From d32137e342389b430bdbe4c64f322b2a6446e6a9 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Tue, 8 Jul 2025 16:49:52 +0000 Subject: [PATCH 1/2] [NFCI][msan] Add avx512bw-intrinsics, avx512bw-intrinsics-upgrade tests Forked from llvm/test/CodeGen/X86. --- .../X86/avx512bw-intrinsics-upgrade.ll | 7652 +++++++++++++++++ .../X86/avx512bw-intrinsics.ll | 3687 ++++++++ 2 files changed, 11339 insertions(+) create mode 100644 llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll create mode 100644 llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll new file mode 100644 index 0000000000000..c8e7db81ec753 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll @@ -0,0 +1,7652 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -S -mattr=+avx512f -passes=msan 2>&1 | FileCheck %s +; +; Forked from llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +; +; Strictly handled: +; - llvm.x86.avx512.dbpsadbw.512 +; - llvm.x86.avx512.packssdw.512, llvm.x86.avx512.packsswb.512 +; - llvm.x86.avx512.packusdw.512, llvm.x86.avx512.packuswb.512 +; - llvm.x86.avx512.pmaddubs.w.512 +; - llvm.x86.avx512.pmaddw.d.512 +; +; Heuristically handled: +; - llvm.sadd.sat.v32i16, llvm.sadd.sat.v64i8 +; - llvm.smax.v32i16, llvm.smax.v64i8 +; - llvm.smin.v32i16, llvm.smin.v64i8 +; - llvm.ssub.sat.v32i16, llvm.ssub.sat.v64i8 +; - llvm.uadd.sat.v32i16, llvm.uadd.sat.v64i8 +; - llvm.umax.v32i16, llvm.umax.v64i8 +; - llvm.umin.v32i16, llvm.umin.v64i8 +; - llvm.usub.sat.v32i16, llvm.usub.sat.v64i8 +; - llvm.x86.avx512.pavg.b.512, llvm.x86.avx512.pavg.w.512 +; - llvm.x86.avx512.permvar.hi.512 +; - llvm.x86.avx512.pmul.hr.sw.512, llvm.x86.avx512.pmulhu.w.512, llvm.x86.avx512.pmulh.w.512 +; - llvm.x86.avx512.pshuf.b.512 +; - llvm.x86.avx512.psllv.w.512, llvm.x86.avx512.psrav.w.512, llvm.x86.avx512.psrlv.w.512 + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32) + +define i32 @test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_kunpck_wd( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[X0:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X1:%.*]] to <32 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <32 x i1> [[TMP3]], <32 x i1> [[TMP3]], <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <32 x i1> [[TMP4]], <32 x i1> [[TMP4]], <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <32 x i1> [[TMP5]], <32 x i1> [[TMP5]], <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <32 x i1> [[TMP6]], <32 x i1> [[TMP6]], <16 x i32> +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <16 x i1> [[_MSPROP1]], <16 x i1> [[_MSPROP]], <32 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i1> [[TMP8]], <16 x i1> [[TMP7]], <32 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i1> [[_MSPROP2]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i1> [[TMP9]] to i32 +; CHECK-NEXT: store i32 [[TMP10]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP11]] +; + %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1) + ret i32 %res +} + +declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64) + +define i64 @test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_kunpck_qd( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[TMP1]] to <64 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[X0:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP2]] to <64 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[X1:%.*]] to <64 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <64 x i1> [[TMP3]], <64 x i1> [[TMP3]], <32 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <64 x i1> [[TMP4]], <64 x i1> [[TMP4]], <32 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <64 x i1> [[TMP5]], <64 x i1> [[TMP5]], <32 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <64 x i1> [[TMP6]], <64 x i1> [[TMP6]], <32 x i32> +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <32 x i1> [[_MSPROP1]], <32 x i1> [[_MSPROP]], <64 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <32 x i1> [[TMP8]], <32 x i1> [[TMP7]], <64 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <64 x i1> [[_MSPROP2]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <64 x i1> [[TMP9]] to i64 +; CHECK-NEXT: store i64 [[TMP10]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP11]] +; + %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1) + ret i64 %res +} + +declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64) + +define { <64 x i8>, <64 x i8>, <64 x i8> } @test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pbroadcast_b_gpr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <64 x i8> splat (i8 -1), i8 [[TMP1]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <64 x i8> poison, i8 [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP5:%.*]] = shufflevector <64 x i8> [[_MSPROP]], <64 x i8> splat (i8 -1), <64 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <64 x i8> [[DOTSPLATINSERT3]], <64 x i8> poison, <64 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <64 x i8> splat (i8 -1), i8 [[TMP1]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <64 x i8> poison, i8 [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = shufflevector <64 x i8> [[_MSPROP6]], <64 x i8> splat (i8 -1), <64 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <64 x i8> [[DOTSPLATINSERT1]], <64 x i8> poison, <64 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP2]] to <64 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[_MSPROP7]], <64 x i8> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = xor <64 x i8> [[DOTSPLAT2]], [[X1:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = or <64 x i8> [[TMP7]], [[_MSPROP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or <64 x i8> [[TMP8]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> [[TMP9]], <64 x i8> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[DOTSPLAT2]], <64 x i8> [[X1]] +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <64 x i8> splat (i8 -1), i8 [[TMP1]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = shufflevector <64 x i8> [[_MSPROP8]], <64 x i8> splat (i8 -1), <64 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <64 x i8> [[DOTSPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[TMP2]] to <64 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64 [[MASK]] to <64 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <64 x i1> [[TMP12]], <64 x i8> [[_MSPROP9]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = xor <64 x i8> [[DOTSPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[_MSPROP9]] +; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i8> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT10:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP16]], <64 x i8> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP12]], <64 x i8> [[DOTSPLAT]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } { <64 x i8> splat (i8 -1), <64 x i8> splat (i8 -1), <64 x i8> splat (i8 -1) }, <64 x i8> [[_MSPROP5]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } poison, <64 x i8> [[DOTSPLAT4]], 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP18]], <64 x i8> [[_MSPROP_SELECT]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[RES3]], <64 x i8> [[TMP10]], 1 +; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP19]], <64 x i8> [[_MSPROP_SELECT10]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[RES4]], <64 x i8> [[TMP17]], 2 +; CHECK-NEXT: store { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP20]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <64 x i8>, <64 x i8>, <64 x i8> } [[RES5]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 -1) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 %mask) + %res2 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> zeroinitializer, i64 %mask) + %res3 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } poison, <64 x i8> %res, 0 + %res4 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %res3, <64 x i8> %res1, 1 + %res5 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %res4, <64 x i8> %res2, 2 + ret { <64 x i8>, <64 x i8>, <64 x i8> } %res5 +} + +declare <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16, <32 x i16>, i32) + +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pbroadcast_w_gpr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <32 x i16> splat (i16 -1), i16 [[TMP1]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <32 x i16> poison, i16 [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP5:%.*]] = shufflevector <32 x i16> [[_MSPROP]], <32 x i16> splat (i16 -1), <32 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <32 x i16> [[DOTSPLATINSERT3]], <32 x i16> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <32 x i16> splat (i16 -1), i16 [[TMP1]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <32 x i16> poison, i16 [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = shufflevector <32 x i16> [[_MSPROP6]], <32 x i16> splat (i16 -1), <32 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <32 x i16> [[DOTSPLATINSERT1]], <32 x i16> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[_MSPROP7]], <32 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i16> [[DOTSPLAT2]], [[X1:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i16> [[TMP7]], [[_MSPROP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP4]], <32 x i16> [[TMP9]], <32 x i16> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[DOTSPLAT2]], <32 x i16> [[X1]] +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <32 x i16> splat (i16 -1), i16 [[TMP1]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = shufflevector <32 x i16> [[_MSPROP8]], <32 x i16> splat (i16 -1), <32 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <32 x i16> [[DOTSPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[_MSPROP9]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = xor <32 x i16> [[DOTSPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[_MSPROP9]] +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i16> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT10:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP16]], <32 x i16> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[DOTSPLAT]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } { <32 x i16> splat (i16 -1), <32 x i16> splat (i16 -1), <32 x i16> splat (i16 -1) }, <32 x i16> [[_MSPROP5]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> [[DOTSPLAT4]], 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP18]], <32 x i16> [[_MSPROP_SELECT]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[RES3]], <32 x i16> [[TMP10]], 1 +; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP19]], <32 x i16> [[_MSPROP_SELECT10]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[RES4]], <32 x i16> [[TMP17]], 2 +; CHECK-NEXT: store { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP20]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <32 x i16>, <32 x i16>, <32 x i16> } [[RES5]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask) + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 + } + +declare void @llvm.x86.avx512.mask.storeu.b.512(ptr, <64 x i8>, i64) + +define void @test_int_x86_avx512_mask_storeu_b_512(ptr %ptr1, ptr %ptr2, <64 x i8> %x1, i64 %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_storeu_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP1]] to <64 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[X2:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR1:%.*]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.masked.store.v64i8.p0(<64 x i8> [[TMP2]], ptr [[TMP9]], i32 1, <64 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <64 x i1> [[TMP5]] to i64 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: call void @llvm.masked.store.v64i8.p0(<64 x i8> [[X1:%.*]], ptr [[PTR1]], i32 1, <64 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[PTR2:%.*]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], 87960930222080 +; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr +; CHECK-NEXT: store <64 x i8> [[TMP2]], ptr [[TMP17]], align 1 +; CHECK-NEXT: store <64 x i8> [[X1]], ptr [[PTR2]], align 1 +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.storeu.b.512(ptr %ptr1, <64 x i8> %x1, i64 %x2) + call void @llvm.x86.avx512.mask.storeu.b.512(ptr %ptr2, <64 x i8> %x1, i64 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.w.512(ptr, <32 x i16>, i32) + +define void @test_int_x86_avx512_mask_storeu_w_512(ptr %ptr1, ptr %ptr2, <32 x i16> %x1, i32 %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_storeu_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X2:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR1:%.*]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.masked.store.v32i16.p0(<32 x i16> [[TMP2]], ptr [[TMP9]], i32 1, <32 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i1> [[TMP5]] to i32 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: call void @llvm.masked.store.v32i16.p0(<32 x i16> [[X1:%.*]], ptr [[PTR1]], i32 1, <32 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[PTR2:%.*]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], 87960930222080 +; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr +; CHECK-NEXT: store <32 x i16> [[TMP2]], ptr [[TMP17]], align 1 +; CHECK-NEXT: store <32 x i16> [[X1]], ptr [[PTR2]], align 1 +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.storeu.w.512(ptr %ptr1, <32 x i16> %x1, i32 %x2) + call void @llvm.x86.avx512.mask.storeu.w.512(ptr %ptr2, <32 x i16> %x1, i32 -1) + ret void +} + +declare <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(ptr, <32 x i16>, i32) + +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_loadu_w_512(ptr %ptr, ptr %ptr2, <32 x i16> %x1, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_loadu_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = load <32 x i16>, ptr [[PTR:%.*]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[PTR2:%.*]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD:%.*]] = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr [[TMP14]], i32 1, <32 x i1> [[TMP11]], <32 x i16> [[_MSLD]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <32 x i1> [[TMP10]] to i32 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i32 [[TMP15]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 16: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr [[PTR2]], i32 1, <32 x i1> [[TMP11]], <32 x i16> [[TMP6]]) +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080 +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD1:%.*]] = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr [[TMP23]], i32 1, <32 x i1> [[TMP20]], <32 x i16> zeroinitializer) +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <32 x i1> [[TMP19]] to i32 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i32 [[TMP24]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP25:%.*]], label [[TMP26:%.*]], !prof [[PROF1]] +; CHECK: 25: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 26: +; CHECK-NEXT: [[TMP27:%.*]] = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr [[PTR]], i32 1, <32 x i1> [[TMP20]], <32 x i16> zeroinitializer) +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } { <32 x i16> splat (i16 -1), <32 x i16> splat (i16 -1), <32 x i16> splat (i16 -1) }, <32 x i16> [[_MSLD]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> [[TMP6]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP28]], <32 x i16> [[_MSMASKEDLD]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[RES3]], <32 x i16> [[TMP18]], 1 +; CHECK-NEXT: [[TMP30:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP29]], <32 x i16> [[_MSMASKEDLD1]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[RES4]], <32 x i16> [[TMP27]], 2 +; CHECK-NEXT: store { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP30]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <32 x i16>, <32 x i16>, <32 x i16> } [[RES5]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(ptr %ptr, <32 x i16> %x1, i32 -1) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(ptr %ptr2, <32 x i16> %res, i32 %mask) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(ptr %ptr, <32 x i16> zeroinitializer, i32 %mask) + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 +} + +declare <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(ptr, <64 x i8>, i64) + +define { <64 x i8>, <64 x i8>, <64 x i8> } @test_int_x86_avx512_mask_loadu_b_512(ptr %ptr, ptr %ptr2, <64 x i8> %x1, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_loadu_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = load <64 x i8>, ptr [[PTR:%.*]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <64 x i8>, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP2]] to <64 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[PTR2:%.*]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD:%.*]] = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr [[TMP14]], i32 1, <64 x i1> [[TMP11]], <64 x i8> [[_MSLD]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <64 x i1> [[TMP10]] to i64 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP15]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 16: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr [[PTR2]], i32 1, <64 x i1> [[TMP11]], <64 x i8> [[TMP6]]) +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i64 [[TMP2]] to <64 x i1> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i64 [[MASK]] to <64 x i1> +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080 +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD1:%.*]] = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr [[TMP23]], i32 1, <64 x i1> [[TMP20]], <64 x i8> zeroinitializer) +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <64 x i1> [[TMP19]] to i64 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP24]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP25:%.*]], label [[TMP26:%.*]], !prof [[PROF1]] +; CHECK: 25: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 26: +; CHECK-NEXT: [[TMP27:%.*]] = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr [[PTR]], i32 1, <64 x i1> [[TMP20]], <64 x i8> zeroinitializer) +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } { <64 x i8> splat (i8 -1), <64 x i8> splat (i8 -1), <64 x i8> splat (i8 -1) }, <64 x i8> [[_MSLD]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } poison, <64 x i8> [[TMP6]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP28]], <64 x i8> [[_MSMASKEDLD]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[RES3]], <64 x i8> [[TMP18]], 1 +; CHECK-NEXT: [[TMP30:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP29]], <64 x i8> [[_MSMASKEDLD1]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } [[RES4]], <64 x i8> [[TMP27]], 2 +; CHECK-NEXT: store { <64 x i8>, <64 x i8>, <64 x i8> } [[TMP30]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <64 x i8>, <64 x i8>, <64 x i8> } [[RES5]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(ptr %ptr, <64 x i8> %x1, i64 -1) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(ptr %ptr2, <64 x i8> %res, i64 %mask) + %res2 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(ptr %ptr, <64 x i8> zeroinitializer, i64 %mask) + %res3 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } poison, <64 x i8> %res, 0 + %res4 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %res3, <64 x i8> %res1, 1 + %res5 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %res4, <64 x i8> %res2, 2 + ret { <64 x i8>, <64 x i8>, <64 x i8> } %res5 +} + +declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32) + +define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_psll_dq_512(<8 x i64> %x0) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_psll_dq_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to <64 x i8> +; CHECK-NEXT: [[CAST2:%.*]] = bitcast <8 x i64> [[X0:%.*]] to <64 x i8> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <64 x i8> zeroinitializer, <64 x i8> [[TMP2]], <64 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <64 x i8> zeroinitializer, <64 x i8> [[CAST2]], <64 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <64 x i8> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[CAST3:%.*]] = bitcast <64 x i8> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to <64 x i8> +; CHECK-NEXT: [[CAST:%.*]] = bitcast <8 x i64> [[X0]] to <64 x i8> +; CHECK-NEXT: [[_MSPROP4:%.*]] = shufflevector <64 x i8> zeroinitializer, <64 x i8> [[TMP5]], <64 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <64 x i8> zeroinitializer, <64 x i8> [[CAST]], <64 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <64 x i8> [[_MSPROP4]] to <8 x i64> +; CHECK-NEXT: [[CAST1:%.*]] = bitcast <64 x i8> [[TMP6]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <8 x i64>, <8 x i64> } { <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1) }, <8 x i64> [[TMP4]], 0 +; CHECK-NEXT: [[RES2:%.*]] = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> [[CAST3]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <8 x i64>, <8 x i64> } [[TMP8]], <8 x i64> [[TMP7]], 1 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <8 x i64>, <8 x i64> } [[RES2]], <8 x i64> [[CAST1]], 1 +; CHECK-NEXT: store { <8 x i64>, <8 x i64> } [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <8 x i64>, <8 x i64> } [[RES3]] +; + %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8) + %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4) + %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res, 0 + %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1 + ret { <8 x i64>, <8 x i64> } %res3 +} + +define <8 x i64> @test_int_x86_avx512_psll_load_dq_512(ptr %p0) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_psll_load_dq_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[X0:%.*]] = load <8 x i64>, ptr [[P0:%.*]], align 64 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P0]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP6]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[_MSLD]] to <64 x i8> +; CHECK-NEXT: [[CAST:%.*]] = bitcast <8 x i64> [[X0]] to <64 x i8> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <64 x i8> zeroinitializer, <64 x i8> [[TMP7]], <64 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <64 x i8> zeroinitializer, <64 x i8> [[CAST]], <64 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <64 x i8> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[CAST1:%.*]] = bitcast <64 x i8> [[TMP8]] to <8 x i64> +; CHECK-NEXT: store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[CAST1]] +; + %x0 = load <8 x i64>, ptr%p0 + %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32) + +define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_psrl_dq_512(<8 x i64> %x0) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_psrl_dq_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to <64 x i8> +; CHECK-NEXT: [[CAST2:%.*]] = bitcast <8 x i64> [[X0:%.*]] to <64 x i8> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <64 x i8> [[TMP2]], <64 x i8> zeroinitializer, <64 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <64 x i8> [[CAST2]], <64 x i8> zeroinitializer, <64 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <64 x i8> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[CAST3:%.*]] = bitcast <64 x i8> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to <64 x i8> +; CHECK-NEXT: [[CAST:%.*]] = bitcast <8 x i64> [[X0]] to <64 x i8> +; CHECK-NEXT: [[_MSPROP4:%.*]] = shufflevector <64 x i8> [[TMP5]], <64 x i8> zeroinitializer, <64 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <64 x i8> [[CAST]], <64 x i8> zeroinitializer, <64 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <64 x i8> [[_MSPROP4]] to <8 x i64> +; CHECK-NEXT: [[CAST1:%.*]] = bitcast <64 x i8> [[TMP6]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <8 x i64>, <8 x i64> } { <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1) }, <8 x i64> [[TMP4]], 0 +; CHECK-NEXT: [[RES2:%.*]] = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> [[CAST3]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <8 x i64>, <8 x i64> } [[TMP8]], <8 x i64> [[TMP7]], 1 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <8 x i64>, <8 x i64> } [[RES2]], <8 x i64> [[CAST1]], 1 +; CHECK-NEXT: store { <8 x i64>, <8 x i64> } [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <8 x i64>, <8 x i64> } [[RES3]] +; + %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8) + %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4) + %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res, 0 + %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1 + ret { <8 x i64>, <8 x i64> } %res3 +} + +define <8 x i64> @test_int_x86_avx512_psrl_load_dq_512(ptr %p0) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_psrl_load_dq_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[X0:%.*]] = load <8 x i64>, ptr [[P0:%.*]], align 64 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P0]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP6]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[_MSLD]] to <64 x i8> +; CHECK-NEXT: [[CAST:%.*]] = bitcast <8 x i64> [[X0]] to <64 x i8> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <64 x i8> [[TMP7]], <64 x i8> zeroinitializer, <64 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <64 x i8> [[CAST]], <64 x i8> zeroinitializer, <64 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <64 x i8> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[CAST1:%.*]] = bitcast <64 x i8> [[TMP8]] to <8 x i64> +; CHECK-NEXT: store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[CAST1]] +; + %x0 = load <8 x i64>, ptr%p0 + %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4) + ret <8 x i64> %res +} + +declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64) + +define <64 x i8> @test_int_x86_avx512_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_palignr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <64 x i8> [[TMP1]], <64 x i8> [[TMP2]], <64 x i32> +; CHECK-NEXT: [[PALIGNR:%.*]] = shufflevector <64 x i8> [[X1:%.*]], <64 x i8> [[X0:%.*]], <64 x i32> +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[PALIGNR]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_palignr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <64 x i8> [[TMP1]], <64 x i8> [[TMP2]], <64 x i32> +; CHECK-NEXT: [[PALIGNR:%.*]] = shufflevector <64 x i8> [[X1:%.*]], <64 x i8> [[X0:%.*]], <64 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[X4:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <64 x i8> [[PALIGNR]], [[X3:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <64 x i8> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[TMP10]], <64 x i8> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[PALIGNR]], <64 x i8> [[X3]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP11]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4) + ret <64 x i8> %res +} + +define <64 x i8> @test_int_x86_avx512_maskz_palignr_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x4) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_palignr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <64 x i8> [[TMP1]], <64 x i8> [[TMP2]], <64 x i32> +; CHECK-NEXT: [[PALIGNR:%.*]] = shufflevector <64 x i8> [[X1:%.*]], <64 x i8> [[X0:%.*]], <64 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[X4:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[_MSPROP]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <64 x i8> [[PALIGNR]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <64 x i8> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <64 x i8> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> [[TMP9]], <64 x i8> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[PALIGNR]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP10]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4) + ret <64 x i8> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16>, i32, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_pshufh_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pshufh_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> [[TMP1]], <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i16> [[X0:%.*]], <32 x i16> [[X0]], <32 x i32> +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP2]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_pshufh_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pshufh_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> [[TMP1]], <32 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i16> [[X0:%.*]], <32 x i16> [[X0]], <32 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], [[X2:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_maskz_pshufh_w_512(<32 x i16> %x0, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_pshufh_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> [[TMP1]], <32 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[X0:%.*]], <32 x i16> [[X0]], <32 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i16> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i16> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP4]], <32 x i16> [[TMP9]], <32 x i16> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP3]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP10]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16>, i32, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_pshufl_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pshufl_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> [[TMP1]], <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i16> [[X0:%.*]], <32 x i16> [[X0]], <32 x i32> +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP2]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pshufl_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> [[TMP1]], <32 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i16> [[X0:%.*]], <32 x i16> [[X0]], <32 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], [[X2:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_maskz_pshufl_w_512(<32 x i16> %x0, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_pshufl_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> [[TMP1]], <32 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[X0:%.*]], <32 x i16> [[X0]], <32 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i16> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i16> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP4]], <32 x i16> [[TMP9]], <32 x i16> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP3]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP10]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3) + ret <32 x i16> %res +} + +define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) nounwind #0 { +; CHECK-LABEL: @test_pcmpeq_b( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <64 x i8> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <64 x i8> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <64 x i8> [[TMP4]], splat (i8 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <64 x i8> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <64 x i8> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <64 x i1> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <64 x i8> [[A]], [[B]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <64 x i1> [[_MSPROP_ICMP]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <64 x i1> [[TMP9]] to i64 +; CHECK-NEXT: store i64 [[TMP10]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP11]] +; + %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) + ret i64 %res +} + +define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_pcmpeq_b( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = xor <64 x i8> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <64 x i8> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <64 x i8> [[TMP5]], splat (i8 -1) +; CHECK-NEXT: [[TMP8:%.*]] = and <64 x i8> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <64 x i8> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <64 x i1> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <64 x i8> [[A]], [[B]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = and <64 x i1> [[_MSPROP_ICMP]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = and <64 x i1> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = and <64 x i1> [[_MSPROP_ICMP]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <64 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <64 x i1> [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i1> [[TMP17]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <64 x i1> [[TMP18]] to i64 +; CHECK-NEXT: store i64 [[TMP19]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP20]] +; + %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) + ret i64 %res +} + +declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64) + +define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) nounwind #0 { +; CHECK-LABEL: @test_pcmpeq_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <32 x i16> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <32 x i16> [[TMP4]], splat (i16 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <32 x i16> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <32 x i16> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <32 x i1> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <32 x i16> [[A]], [[B]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i1> [[_MSPROP_ICMP]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i1> [[TMP9]] to i32 +; CHECK-NEXT: store i32 [[TMP10]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP11]] +; + %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) + ret i32 %res +} + +define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_pcmpeq_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = xor <32 x i16> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i16> [[TMP5]], splat (i16 -1) +; CHECK-NEXT: [[TMP8:%.*]] = and <32 x i16> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <32 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <32 x i1> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <32 x i16> [[A]], [[B]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i1> [[_MSPROP_ICMP]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i1> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = and <32 x i1> [[_MSPROP_ICMP]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <32 x i1> [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i1> [[TMP17]] to i32 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <32 x i1> [[TMP18]] to i32 +; CHECK-NEXT: store i32 [[TMP19]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP20]] +; + %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) + ret i32 %res +} + +declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32) + +define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) nounwind #0 { +; CHECK-LABEL: @test_pcmpgt_b( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <64 x i8> [[A:%.*]], splat (i8 -128) +; CHECK-NEXT: [[TMP4:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP5:%.*]] = and <64 x i8> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = or <64 x i8> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = xor <64 x i8> [[B:%.*]], splat (i8 -128) +; CHECK-NEXT: [[TMP8:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP9:%.*]] = and <64 x i8> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP7]], [[TMP2]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt <64 x i8> [[TMP5]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt <64 x i8> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i1> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp sgt <64 x i8> [[A]], [[B]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <64 x i1> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <64 x i1> [[TMP14]] to i64 +; CHECK-NEXT: store i64 [[TMP15]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP16]] +; + %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) + ret i64 %res +} + +define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_pcmpgt_b( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = xor <64 x i8> [[A:%.*]], splat (i8 -128) +; CHECK-NEXT: [[TMP5:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP6:%.*]] = and <64 x i8> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = or <64 x i8> [[TMP4]], [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <64 x i8> [[B:%.*]], splat (i8 -128) +; CHECK-NEXT: [[TMP9:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP10:%.*]] = and <64 x i8> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP8]], [[TMP2]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt <64 x i8> [[TMP6]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt <64 x i8> [[TMP7]], [[TMP10]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <64 x i1> [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <64 x i8> [[A]], [[B]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = and <64 x i1> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = and <64 x i1> [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = and <64 x i1> [[TMP14]], [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = or <64 x i1> [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = or <64 x i1> [[TMP21]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = and <64 x i1> [[TMP15]], [[TMP17]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <64 x i1> [[TMP22]] to i64 +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <64 x i1> [[TMP23]] to i64 +; CHECK-NEXT: store i64 [[TMP24]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP25]] +; + %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) + ret i64 %res +} + +declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64) + +define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) nounwind #0 { +; CHECK-LABEL: @test_pcmpgt_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <32 x i16> [[A:%.*]], splat (i16 -32768) +; CHECK-NEXT: [[TMP4:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP5:%.*]] = and <32 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = or <32 x i16> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i16> [[B:%.*]], splat (i16 -32768) +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP9:%.*]] = and <32 x i16> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP7]], [[TMP2]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt <32 x i16> [[TMP5]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt <32 x i16> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i1> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp sgt <32 x i16> [[A]], [[B]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <32 x i1> [[TMP13]] to i32 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <32 x i1> [[TMP14]] to i32 +; CHECK-NEXT: store i32 [[TMP15]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP16]] +; + %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) + ret i32 %res +} + +define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_pcmpgt_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = xor <32 x i16> [[A:%.*]], splat (i16 -32768) +; CHECK-NEXT: [[TMP5:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP6:%.*]] = and <32 x i16> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = or <32 x i16> [[TMP4]], [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[B:%.*]], splat (i16 -32768) +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP10:%.*]] = and <32 x i16> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP8]], [[TMP2]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt <32 x i16> [[TMP6]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt <32 x i16> [[TMP7]], [[TMP10]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <32 x i1> [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <32 x i16> [[A]], [[B]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = and <32 x i1> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = and <32 x i1> [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = and <32 x i1> [[TMP14]], [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = or <32 x i1> [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = or <32 x i1> [[TMP21]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = and <32 x i1> [[TMP15]], [[TMP17]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <32 x i1> [[TMP22]] to i32 +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <32 x i1> [[TMP23]] to i32 +; CHECK-NEXT: store i32 [[TMP24]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP25]] +; + %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) + ret i32 %res +} + +declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32) + +declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <64 x i8> @test_int_x86_avx512_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_punpckhb_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <64 x i8> [[TMP1]], <64 x i8> [[TMP2]], <64 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]], <64 x i32> +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_punpckhb_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <64 x i8> [[TMP1]], <64 x i8> [[TMP2]], <64 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]], <64 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[X3:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <64 x i8> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP11]], <64 x i8> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[TMP5]], <64 x i8> [[X2]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP12]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + ret <64 x i8> %res +} + +declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <64 x i8> @test_int_x86_avx512_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_punpcklb_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <64 x i8> [[TMP1]], <64 x i8> [[TMP2]], <64 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]], <64 x i32> +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_punpcklb_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <64 x i8> [[TMP1]], <64 x i8> [[TMP2]], <64 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]], <64 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[X3:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <64 x i8> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP11]], <64 x i8> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[TMP5]], <64 x i8> [[X2]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP12]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + ret <64 x i8> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_punpckhw_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> [[TMP2]], <32 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]], <32 x i32> +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_punpckhw_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> [[TMP2]], <32 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]], <32 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_punpcklw_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> [[TMP2]], <32 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]], <32 x i32> +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_punpcklw_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> [[TMP2]], <32 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]], <32 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <64 x i8> @test_int_x86_avx512_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmaxs_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.smax.v64i8(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.smax.v64i8(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[X3:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <64 x i8> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP11]], <64 x i8> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[TMP5]], <64 x i8> [[X2]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP12]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + ret <64 x i8> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmaxs_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.smax.v32i16(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.smax.v32i16(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <64 x i8> @test_int_x86_avx512_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmaxu_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.umax.v64i8(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.umax.v64i8(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[X3:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <64 x i8> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP11]], <64 x i8> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[TMP5]], <64 x i8> [[X2]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP12]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + ret <64 x i8> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmaxu_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.umax.v32i16(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.umax.v32i16(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <64 x i8> @test_int_x86_avx512_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmins_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.smin.v64i8(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.smin.v64i8(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[X3:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <64 x i8> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP11]], <64 x i8> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[TMP5]], <64 x i8> [[X2]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP12]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + ret <64 x i8> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmins_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.smin.v32i16(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.smin.v32i16(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <64 x i8> @test_int_x86_avx512_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pminu_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.umin.v64i8(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.umin.v64i8(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[X3:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <64 x i8> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP11]], <64 x i8> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[TMP5]], <64 x i8> [[X2]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP12]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + ret <64 x i8> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pminu_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.umin.v32i16(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.umin.v32i16(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmovzxb_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> splat (i8 -1), <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[X0:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <32 x i8> [[_MSPROP]] to <32 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = zext <32 x i8> [[TMP2]] to <32 x i16> +; CHECK-NEXT: store <32 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovzxb_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> splat (i8 -1), <32 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i8> [[X0:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <32 x i8> [[_MSPROP]] to <32 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = zext <32 x i8> [[TMP4]] to <32 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X2:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP1]], <32 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X1:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X1]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_maskz_pmovzxb_w_512(<32 x i8> %x0, i32 %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovzxb_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> splat (i8 -1), <32 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i8> [[X0:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <32 x i8> [[_MSPROP]] to <32 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = zext <32 x i8> [[TMP3]] to <32 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X2:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP1]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmovsxb_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> splat (i8 -1), <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[X0:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <32 x i8> [[_MSPROP]] to <32 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = sext <32 x i8> [[TMP2]] to <32 x i16> +; CHECK-NEXT: store <32 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovsxb_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> splat (i8 -1), <32 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i8> [[X0:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <32 x i8> [[_MSPROP]] to <32 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = sext <32 x i8> [[TMP4]] to <32 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X2:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP1]], <32 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X1:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X1]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_maskz_pmovsxb_w_512(<32 x i8> %x0, i32 %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovsxb_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> splat (i8 -1), <32 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i8> [[X0:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <32 x i8> [[_MSPROP]] to <32 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = sext <32 x i8> [[TMP3]] to <32 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X2:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP1]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_psrl_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <32 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[TMP1]], <8 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[X0:%.*]], <8 x i16> [[X1]]) +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP10]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_psrl_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[TMP1]], <8 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[X0:%.*]], <8 x i16> [[X1]]) +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = xor <32 x i16> [[TMP12]], [[X2:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i16> [[TMP16]], [[TMP11]] +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP13]], <32 x i16> [[TMP18]], <32 x i16> [[TMP15]] +; CHECK-NEXT: [[TMP19:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP12]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP19]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_maskz_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_psrl_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[TMP1]], <8 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[X0:%.*]], <8 x i16> [[X1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = select <32 x i1> [[TMP13]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = xor <32 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i16> [[TMP15]], [[TMP10]] +; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i16> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[TMP17]], <32 x i16> [[TMP14]] +; CHECK-NEXT: [[TMP18:%.*]] = select <32 x i1> [[TMP13]], <32 x i16> [[TMP11]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP18]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16>, i32, <32 x i16>, i32) + +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_psrl_wi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> [[TMP1]], i32 3) +; CHECK-NEXT: [[TMP5:%.*]] = or <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> [[X0:%.*]], i32 3) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <32 x i1> [[TMP8]], <32 x i16> [[TMP5]], <32 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <32 x i16> [[TMP6]], [[X2:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <32 x i16> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP12]], <32 x i16> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP8]], <32 x i16> [[TMP6]], <32 x i16> [[X2]] +; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> [[TMP1]], i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> [[X0]], i32 4) +; CHECK-NEXT: [[TMP17:%.*]] = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> [[TMP1]], i32 5) +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> [[X0]], i32 5) +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i32 [[X3]] to <32 x i1> +; CHECK-NEXT: [[TMP22:%.*]] = select <32 x i1> [[TMP21]], <32 x i16> [[TMP18]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = xor <32 x i16> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = or <32 x i16> [[TMP23]], [[TMP18]] +; CHECK-NEXT: [[TMP25:%.*]] = or <32 x i16> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <32 x i1> [[TMP20]], <32 x i16> [[TMP25]], <32 x i16> [[TMP22]] +; CHECK-NEXT: [[TMP26:%.*]] = select <32 x i1> [[TMP21]], <32 x i16> [[TMP19]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } { <32 x i16> splat (i16 -1), <32 x i16> splat (i16 -1), <32 x i16> splat (i16 -1) }, <32 x i16> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> [[TMP13]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP27]], <32 x i16> [[TMP15]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[RES3]], <32 x i16> [[TMP16]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP28]], <32 x i16> [[_MSPROP_SELECT1]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[RES4]], <32 x i16> [[TMP26]], 2 +; CHECK-NEXT: store { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP29]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <32 x i16>, <32 x i16>, <32 x i16> } [[RES5]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 4, <32 x i16> %x2, i32 -1) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 5, <32 x i16> zeroinitializer, i32 %x3) + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 +} + +declare <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_psra_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <32 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[TMP1]], <8 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[X0:%.*]], <8 x i16> [[X1]]) +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP10]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_psra_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[TMP1]], <8 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[X0:%.*]], <8 x i16> [[X1]]) +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = xor <32 x i16> [[TMP12]], [[X2:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i16> [[TMP16]], [[TMP11]] +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP13]], <32 x i16> [[TMP18]], <32 x i16> [[TMP15]] +; CHECK-NEXT: [[TMP19:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP12]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP19]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_maskz_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_psra_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[TMP1]], <8 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[X0:%.*]], <8 x i16> [[X1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = select <32 x i1> [[TMP13]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = xor <32 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i16> [[TMP15]], [[TMP10]] +; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i16> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[TMP17]], <32 x i16> [[TMP14]] +; CHECK-NEXT: [[TMP18:%.*]] = select <32 x i1> [[TMP13]], <32 x i16> [[TMP11]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP18]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16>, i32, <32 x i16>, i32) + +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_psra_wi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> [[TMP1]], i32 3) +; CHECK-NEXT: [[TMP5:%.*]] = or <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> [[X0:%.*]], i32 3) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <32 x i1> [[TMP8]], <32 x i16> [[TMP5]], <32 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <32 x i16> [[TMP6]], [[X2:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <32 x i16> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP12]], <32 x i16> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP8]], <32 x i16> [[TMP6]], <32 x i16> [[X2]] +; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> [[TMP1]], i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> [[X0]], i32 4) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32 [[X3]] to <32 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = select <32 x i1> [[TMP18]], <32 x i16> [[TMP15]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = xor <32 x i16> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <32 x i16> [[TMP20]], [[TMP15]] +; CHECK-NEXT: [[TMP22:%.*]] = or <32 x i16> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <32 x i1> [[TMP17]], <32 x i16> [[TMP22]], <32 x i16> [[TMP19]] +; CHECK-NEXT: [[TMP23:%.*]] = select <32 x i1> [[TMP18]], <32 x i16> [[TMP16]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> [[TMP1]], i32 5) +; CHECK-NEXT: [[TMP25:%.*]] = or <32 x i16> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> [[X0]], i32 5) +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } { <32 x i16> splat (i16 -1), <32 x i16> splat (i16 -1), <32 x i16> splat (i16 -1) }, <32 x i16> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> [[TMP13]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP27]], <32 x i16> [[_MSPROP_SELECT1]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[RES3]], <32 x i16> [[TMP23]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP28]], <32 x i16> [[TMP25]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[RES4]], <32 x i16> [[TMP26]], 2 +; CHECK-NEXT: store { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP29]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <32 x i16>, <32 x i16>, <32 x i16> } [[RES5]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 4, <32 x i16> zeroinitializer, i32 %x3) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 5, <32 x i16> %x2, i32 -1) + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 +} + +declare <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_psll_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <32 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[TMP1]], <8 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[X0:%.*]], <8 x i16> [[X1]]) +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP10]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_psll_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[TMP1]], <8 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[X0:%.*]], <8 x i16> [[X1]]) +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = xor <32 x i16> [[TMP12]], [[X2:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i16> [[TMP16]], [[TMP11]] +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP13]], <32 x i16> [[TMP18]], <32 x i16> [[TMP15]] +; CHECK-NEXT: [[TMP19:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP12]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP19]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_maskz_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_psll_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[TMP1]], <8 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[X0:%.*]], <8 x i16> [[X1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = select <32 x i1> [[TMP13]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = xor <32 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i16> [[TMP15]], [[TMP10]] +; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i16> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[TMP17]], <32 x i16> [[TMP14]] +; CHECK-NEXT: [[TMP18:%.*]] = select <32 x i1> [[TMP13]], <32 x i16> [[TMP11]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP18]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16>, i32, <32 x i16>, i32) + +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_psll_wi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> [[TMP1]], i32 3) +; CHECK-NEXT: [[TMP5:%.*]] = or <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> [[X0:%.*]], i32 3) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <32 x i1> [[TMP8]], <32 x i16> [[TMP5]], <32 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <32 x i16> [[TMP6]], [[X2:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <32 x i16> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP12]], <32 x i16> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP8]], <32 x i16> [[TMP6]], <32 x i16> [[X2]] +; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> [[TMP1]], i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> [[X0]], i32 4) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32 [[X3]] to <32 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = select <32 x i1> [[TMP18]], <32 x i16> [[TMP15]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = xor <32 x i16> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <32 x i16> [[TMP20]], [[TMP15]] +; CHECK-NEXT: [[TMP22:%.*]] = or <32 x i16> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <32 x i1> [[TMP17]], <32 x i16> [[TMP22]], <32 x i16> [[TMP19]] +; CHECK-NEXT: [[TMP23:%.*]] = select <32 x i1> [[TMP18]], <32 x i16> [[TMP16]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> [[TMP1]], i32 5) +; CHECK-NEXT: [[TMP25:%.*]] = or <32 x i16> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> [[X0]], i32 5) +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } { <32 x i16> splat (i16 -1), <32 x i16> splat (i16 -1), <32 x i16> splat (i16 -1) }, <32 x i16> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> [[TMP13]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP27]], <32 x i16> [[_MSPROP_SELECT1]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[RES3]], <32 x i16> [[TMP23]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP28]], <32 x i16> [[TMP25]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[RES4]], <32 x i16> [[TMP26]], 2 +; CHECK-NEXT: store { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP29]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <32 x i16>, <32 x i16>, <32 x i16> } [[RES5]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 4, <32 x i16> zeroinitializer, i32 %x3) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 5, <32 x i16> %x2, i32 -1) + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 +} + +declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <64 x i8> @test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pshuf_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[X3:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <64 x i8> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP11]], <64 x i8> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[TMP5]], <64 x i8> [[X2]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP12]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + ret <64 x i8> %res +} + + +declare <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64) + +define <64 x i8> @test_int_x86_avx512_cvtmask2b_512(i64 %x0) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_cvtmask2b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <64 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[X0:%.*]] to <64 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = sext <64 x i1> [[TMP2]] to <64 x i8> +; CHECK-NEXT: [[VPMOVM2:%.*]] = sext <64 x i1> [[TMP3]] to <64 x i8> +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[VPMOVM2]] +; + %res = call <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64 %x0) + ret <64 x i8> %res +} + +declare <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32) + +define <32 x i16> @test_int_x86_avx512_cvtmask2w_512(i32 %x0) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_cvtmask2w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[X0:%.*]] to <32 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = sext <32 x i1> [[TMP2]] to <32 x i16> +; CHECK-NEXT: [[VPMOVM2:%.*]] = sext <32 x i1> [[TMP3]] to <32 x i16> +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[VPMOVM2]] +; + %res = call <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32 %x0) + ret <32 x i16> %res +} +define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) nounwind #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP7]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP16]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP9]], <32 x i16> [[TMP14]], <32 x i16> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP15]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) nounwind #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP21]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP20]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) nounwind #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rmb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rmbk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP21]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rmbkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP20]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) + +define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind #0 { +; CHECK-LABEL: @test_mask_packs_epi16_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP7]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packs_epi16_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP16]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packs_epi16_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP9]], <64 x i8> [[TMP14]], <64 x i8> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP15]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwind #0 { +; CHECK-LABEL: @test_mask_packs_epi16_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP12]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 x i8> %passThru, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packs_epi16_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP18:%.*]] = xor <64 x i8> [[TMP14]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i8> [[TMP19]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP20]], <64 x i8> [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP14]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP21]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packs_epi16_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = xor <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP14]], <64 x i8> [[TMP19]], <64 x i8> [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP13]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP20]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) + ret <64 x i8> %res +} + +declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) + + +define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) nounwind #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP7]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP16]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP9]], <32 x i16> [[TMP14]], <32 x i16> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP15]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) nounwind #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP21]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP20]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) nounwind #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rmb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rmbk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP21]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rmbkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP20]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) + +define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind #0 { +; CHECK-LABEL: @test_mask_packus_epi16_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP7]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packus_epi16_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP16]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packus_epi16_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP9]], <64 x i8> [[TMP14]], <64 x i8> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP15]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwind #0 { +; CHECK-LABEL: @test_mask_packus_epi16_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP12]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 x i8> %passThru, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packus_epi16_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP18:%.*]] = xor <64 x i8> [[TMP14]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i8> [[TMP19]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP20]], <64 x i8> [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP14]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP21]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_packus_epi16_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = xor <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP14]], <64 x i8> [[TMP19]], <64 x i8> [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP13]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP20]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) + ret <64 x i8> %res +} + +declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) + +define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind #0 { +; CHECK-LABEL: @test_cmp_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <64 x i8> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <64 x i8> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <64 x i8> [[TMP4]], splat (i8 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <64 x i8> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <64 x i8> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <64 x i1> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <64 x i1> [[_MSPROP_ICMP]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <64 x i1> [[TMP9]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[A0]], splat (i8 -128) +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP14:%.*]] = and <64 x i8> [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP12]], [[TMP1]] +; CHECK-NEXT: [[TMP16:%.*]] = xor <64 x i8> [[A1]], splat (i8 -128) +; CHECK-NEXT: [[TMP17:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP18:%.*]] = and <64 x i8> [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP16]], [[TMP2]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp ult <64 x i8> [[TMP14]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp ult <64 x i8> [[TMP15]], [[TMP18]] +; CHECK-NEXT: [[TMP22:%.*]] = xor <64 x i1> [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = icmp slt <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <64 x i1> [[TMP22]] to i64 +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <64 x i1> [[TMP23]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP10]], [[TMP24]] +; CHECK-NEXT: [[RET1:%.*]] = add i64 [[TMP11]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = xor <64 x i8> [[A0]], splat (i8 -128) +; CHECK-NEXT: [[TMP27:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP28:%.*]] = and <64 x i8> [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = or <64 x i8> [[TMP26]], [[TMP1]] +; CHECK-NEXT: [[TMP30:%.*]] = xor <64 x i8> [[A1]], splat (i8 -128) +; CHECK-NEXT: [[TMP31:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP32:%.*]] = and <64 x i8> [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = or <64 x i8> [[TMP30]], [[TMP2]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp ule <64 x i8> [[TMP28]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = icmp ule <64 x i8> [[TMP29]], [[TMP32]] +; CHECK-NEXT: [[TMP36:%.*]] = xor <64 x i1> [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = icmp sle <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP38:%.*]] = bitcast <64 x i1> [[TMP36]] to i64 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast <64 x i1> [[TMP37]] to i64 +; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], [[TMP38]] +; CHECK-NEXT: [[RET2:%.*]] = add i64 [[RET1]], [[TMP39]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = or i64 [[_MSPROP1]], 0 +; CHECK-NEXT: [[RET3:%.*]] = add i64 [[RET2]], 0 +; CHECK-NEXT: [[TMP40:%.*]] = xor <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP41:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP42:%.*]] = icmp ne <64 x i8> [[TMP41]], zeroinitializer +; CHECK-NEXT: [[TMP43:%.*]] = xor <64 x i8> [[TMP41]], splat (i8 -1) +; CHECK-NEXT: [[TMP44:%.*]] = and <64 x i8> [[TMP43]], [[TMP40]] +; CHECK-NEXT: [[TMP45:%.*]] = icmp eq <64 x i8> [[TMP44]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP3:%.*]] = and <64 x i1> [[TMP42]], [[TMP45]] +; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP47:%.*]] = bitcast <64 x i1> [[_MSPROP_ICMP3]] to i64 +; CHECK-NEXT: [[TMP48:%.*]] = bitcast <64 x i1> [[TMP46]] to i64 +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP2]], [[TMP47]] +; CHECK-NEXT: [[RET4:%.*]] = add i64 [[RET3]], [[TMP48]] +; CHECK-NEXT: [[TMP49:%.*]] = xor <64 x i8> [[A0]], splat (i8 -128) +; CHECK-NEXT: [[TMP50:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP51:%.*]] = and <64 x i8> [[TMP49]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = or <64 x i8> [[TMP49]], [[TMP1]] +; CHECK-NEXT: [[TMP53:%.*]] = xor <64 x i8> [[A1]], splat (i8 -128) +; CHECK-NEXT: [[TMP54:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP55:%.*]] = and <64 x i8> [[TMP53]], [[TMP54]] +; CHECK-NEXT: [[TMP56:%.*]] = or <64 x i8> [[TMP53]], [[TMP2]] +; CHECK-NEXT: [[TMP57:%.*]] = icmp uge <64 x i8> [[TMP51]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = icmp uge <64 x i8> [[TMP52]], [[TMP55]] +; CHECK-NEXT: [[TMP59:%.*]] = xor <64 x i1> [[TMP57]], [[TMP58]] +; CHECK-NEXT: [[TMP60:%.*]] = icmp sge <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP61:%.*]] = bitcast <64 x i1> [[TMP59]] to i64 +; CHECK-NEXT: [[TMP62:%.*]] = bitcast <64 x i1> [[TMP60]] to i64 +; CHECK-NEXT: [[_MSPROP5:%.*]] = or i64 [[_MSPROP4]], [[TMP61]] +; CHECK-NEXT: [[RET5:%.*]] = add i64 [[RET4]], [[TMP62]] +; CHECK-NEXT: [[TMP63:%.*]] = xor <64 x i8> [[A0]], splat (i8 -128) +; CHECK-NEXT: [[TMP64:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP65:%.*]] = and <64 x i8> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = or <64 x i8> [[TMP63]], [[TMP1]] +; CHECK-NEXT: [[TMP67:%.*]] = xor <64 x i8> [[A1]], splat (i8 -128) +; CHECK-NEXT: [[TMP68:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP69:%.*]] = and <64 x i8> [[TMP67]], [[TMP68]] +; CHECK-NEXT: [[TMP70:%.*]] = or <64 x i8> [[TMP67]], [[TMP2]] +; CHECK-NEXT: [[TMP71:%.*]] = icmp ugt <64 x i8> [[TMP65]], [[TMP70]] +; CHECK-NEXT: [[TMP72:%.*]] = icmp ugt <64 x i8> [[TMP66]], [[TMP69]] +; CHECK-NEXT: [[TMP73:%.*]] = xor <64 x i1> [[TMP71]], [[TMP72]] +; CHECK-NEXT: [[TMP74:%.*]] = icmp sgt <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP75:%.*]] = bitcast <64 x i1> [[TMP73]] to i64 +; CHECK-NEXT: [[TMP76:%.*]] = bitcast <64 x i1> [[TMP74]] to i64 +; CHECK-NEXT: [[_MSPROP6:%.*]] = or i64 [[_MSPROP5]], [[TMP75]] +; CHECK-NEXT: [[RET6:%.*]] = add i64 [[RET5]], [[TMP76]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = or i64 [[_MSPROP6]], 0 +; CHECK-NEXT: [[RET7:%.*]] = add i64 [[RET6]], -1 +; CHECK-NEXT: store i64 [[_MSPROP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[RET7]] +; + %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) + %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) + %ret1 = add i64 %res0, %res1 + %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) + %ret2 = add i64 %ret1, %res2 + %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) + %ret3 = add i64 %ret2, %res3 + %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1) + %ret4 = add i64 %ret3, %res4 + %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1) + %ret5 = add i64 %ret4, %res5 + %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1) + %ret6 = add i64 %ret5, %res6 + %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1) + %ret7 = add i64 %ret6, %res7 + ret i64 %ret7 +} + +define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_cmp_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = xor <64 x i8> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <64 x i8> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <64 x i8> [[TMP5]], splat (i8 -1) +; CHECK-NEXT: [[TMP8:%.*]] = and <64 x i8> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <64 x i8> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <64 x i1> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = and <64 x i1> [[_MSPROP_ICMP]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = and <64 x i1> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = and <64 x i1> [[_MSPROP_ICMP]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <64 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <64 x i1> [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i1> [[TMP17]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <64 x i1> [[TMP18]] to i64 +; CHECK-NEXT: [[TMP21:%.*]] = xor <64 x i8> [[A0]], splat (i8 -128) +; CHECK-NEXT: [[TMP22:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP23:%.*]] = and <64 x i8> [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = or <64 x i8> [[TMP21]], [[TMP1]] +; CHECK-NEXT: [[TMP25:%.*]] = xor <64 x i8> [[A1]], splat (i8 -128) +; CHECK-NEXT: [[TMP26:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP27:%.*]] = and <64 x i8> [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = or <64 x i8> [[TMP25]], [[TMP2]] +; CHECK-NEXT: [[TMP29:%.*]] = icmp ult <64 x i8> [[TMP23]], [[TMP28]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp ult <64 x i8> [[TMP24]], [[TMP27]] +; CHECK-NEXT: [[TMP31:%.*]] = xor <64 x i1> [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = icmp slt <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP34:%.*]] = bitcast i64 [[MASK]] to <64 x i1> +; CHECK-NEXT: [[TMP35:%.*]] = and <64 x i1> [[TMP31]], [[TMP33]] +; CHECK-NEXT: [[TMP36:%.*]] = and <64 x i1> [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP37:%.*]] = and <64 x i1> [[TMP31]], [[TMP34]] +; CHECK-NEXT: [[TMP38:%.*]] = or <64 x i1> [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP39:%.*]] = or <64 x i1> [[TMP38]], [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = and <64 x i1> [[TMP32]], [[TMP34]] +; CHECK-NEXT: [[TMP41:%.*]] = bitcast <64 x i1> [[TMP39]] to i64 +; CHECK-NEXT: [[TMP42:%.*]] = bitcast <64 x i1> [[TMP40]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP19]], [[TMP41]] +; CHECK-NEXT: [[RET1:%.*]] = add i64 [[TMP20]], [[TMP42]] +; CHECK-NEXT: [[TMP43:%.*]] = xor <64 x i8> [[A0]], splat (i8 -128) +; CHECK-NEXT: [[TMP44:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP45:%.*]] = and <64 x i8> [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = or <64 x i8> [[TMP43]], [[TMP1]] +; CHECK-NEXT: [[TMP47:%.*]] = xor <64 x i8> [[A1]], splat (i8 -128) +; CHECK-NEXT: [[TMP48:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP49:%.*]] = and <64 x i8> [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = or <64 x i8> [[TMP47]], [[TMP2]] +; CHECK-NEXT: [[TMP51:%.*]] = icmp ule <64 x i8> [[TMP45]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = icmp ule <64 x i8> [[TMP46]], [[TMP49]] +; CHECK-NEXT: [[TMP53:%.*]] = xor <64 x i1> [[TMP51]], [[TMP52]] +; CHECK-NEXT: [[TMP54:%.*]] = icmp sle <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP55:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP56:%.*]] = bitcast i64 [[MASK]] to <64 x i1> +; CHECK-NEXT: [[TMP57:%.*]] = and <64 x i1> [[TMP53]], [[TMP55]] +; CHECK-NEXT: [[TMP58:%.*]] = and <64 x i1> [[TMP54]], [[TMP55]] +; CHECK-NEXT: [[TMP59:%.*]] = and <64 x i1> [[TMP53]], [[TMP56]] +; CHECK-NEXT: [[TMP60:%.*]] = or <64 x i1> [[TMP57]], [[TMP58]] +; CHECK-NEXT: [[TMP61:%.*]] = or <64 x i1> [[TMP60]], [[TMP59]] +; CHECK-NEXT: [[TMP62:%.*]] = and <64 x i1> [[TMP54]], [[TMP56]] +; CHECK-NEXT: [[TMP63:%.*]] = bitcast <64 x i1> [[TMP61]] to i64 +; CHECK-NEXT: [[TMP64:%.*]] = bitcast <64 x i1> [[TMP62]] to i64 +; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], [[TMP63]] +; CHECK-NEXT: [[RET2:%.*]] = add i64 [[RET1]], [[TMP64]] +; CHECK-NEXT: [[TMP65:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP66:%.*]] = bitcast i64 [[MASK]] to <64 x i1> +; CHECK-NEXT: [[TMP67:%.*]] = and <64 x i1> zeroinitializer, [[TMP65]] +; CHECK-NEXT: [[TMP68:%.*]] = and <64 x i1> zeroinitializer, [[TMP65]] +; CHECK-NEXT: [[TMP69:%.*]] = and <64 x i1> zeroinitializer, [[TMP66]] +; CHECK-NEXT: [[TMP70:%.*]] = or <64 x i1> [[TMP67]], [[TMP68]] +; CHECK-NEXT: [[TMP71:%.*]] = or <64 x i1> [[TMP70]], [[TMP69]] +; CHECK-NEXT: [[TMP72:%.*]] = and <64 x i1> zeroinitializer, [[TMP66]] +; CHECK-NEXT: [[TMP73:%.*]] = bitcast <64 x i1> [[TMP71]] to i64 +; CHECK-NEXT: [[TMP74:%.*]] = bitcast <64 x i1> [[TMP72]] to i64 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or i64 [[_MSPROP1]], [[TMP73]] +; CHECK-NEXT: [[RET3:%.*]] = add i64 [[RET2]], [[TMP74]] +; CHECK-NEXT: [[TMP75:%.*]] = xor <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP76:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP77:%.*]] = icmp ne <64 x i8> [[TMP76]], zeroinitializer +; CHECK-NEXT: [[TMP78:%.*]] = xor <64 x i8> [[TMP76]], splat (i8 -1) +; CHECK-NEXT: [[TMP79:%.*]] = and <64 x i8> [[TMP78]], [[TMP75]] +; CHECK-NEXT: [[TMP80:%.*]] = icmp eq <64 x i8> [[TMP79]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP3:%.*]] = and <64 x i1> [[TMP77]], [[TMP80]] +; CHECK-NEXT: [[TMP81:%.*]] = icmp ne <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP82:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP83:%.*]] = bitcast i64 [[MASK]] to <64 x i1> +; CHECK-NEXT: [[TMP84:%.*]] = and <64 x i1> [[_MSPROP_ICMP3]], [[TMP82]] +; CHECK-NEXT: [[TMP85:%.*]] = and <64 x i1> [[TMP81]], [[TMP82]] +; CHECK-NEXT: [[TMP86:%.*]] = and <64 x i1> [[_MSPROP_ICMP3]], [[TMP83]] +; CHECK-NEXT: [[TMP87:%.*]] = or <64 x i1> [[TMP84]], [[TMP85]] +; CHECK-NEXT: [[TMP88:%.*]] = or <64 x i1> [[TMP87]], [[TMP86]] +; CHECK-NEXT: [[TMP89:%.*]] = and <64 x i1> [[TMP81]], [[TMP83]] +; CHECK-NEXT: [[TMP90:%.*]] = bitcast <64 x i1> [[TMP88]] to i64 +; CHECK-NEXT: [[TMP91:%.*]] = bitcast <64 x i1> [[TMP89]] to i64 +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP2]], [[TMP90]] +; CHECK-NEXT: [[RET4:%.*]] = add i64 [[RET3]], [[TMP91]] +; CHECK-NEXT: [[TMP92:%.*]] = xor <64 x i8> [[A0]], splat (i8 -128) +; CHECK-NEXT: [[TMP93:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP94:%.*]] = and <64 x i8> [[TMP92]], [[TMP93]] +; CHECK-NEXT: [[TMP95:%.*]] = or <64 x i8> [[TMP92]], [[TMP1]] +; CHECK-NEXT: [[TMP96:%.*]] = xor <64 x i8> [[A1]], splat (i8 -128) +; CHECK-NEXT: [[TMP97:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP98:%.*]] = and <64 x i8> [[TMP96]], [[TMP97]] +; CHECK-NEXT: [[TMP99:%.*]] = or <64 x i8> [[TMP96]], [[TMP2]] +; CHECK-NEXT: [[TMP100:%.*]] = icmp uge <64 x i8> [[TMP94]], [[TMP99]] +; CHECK-NEXT: [[TMP101:%.*]] = icmp uge <64 x i8> [[TMP95]], [[TMP98]] +; CHECK-NEXT: [[TMP102:%.*]] = xor <64 x i1> [[TMP100]], [[TMP101]] +; CHECK-NEXT: [[TMP103:%.*]] = icmp sge <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP104:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP105:%.*]] = bitcast i64 [[MASK]] to <64 x i1> +; CHECK-NEXT: [[TMP106:%.*]] = and <64 x i1> [[TMP102]], [[TMP104]] +; CHECK-NEXT: [[TMP107:%.*]] = and <64 x i1> [[TMP103]], [[TMP104]] +; CHECK-NEXT: [[TMP108:%.*]] = and <64 x i1> [[TMP102]], [[TMP105]] +; CHECK-NEXT: [[TMP109:%.*]] = or <64 x i1> [[TMP106]], [[TMP107]] +; CHECK-NEXT: [[TMP110:%.*]] = or <64 x i1> [[TMP109]], [[TMP108]] +; CHECK-NEXT: [[TMP111:%.*]] = and <64 x i1> [[TMP103]], [[TMP105]] +; CHECK-NEXT: [[TMP112:%.*]] = bitcast <64 x i1> [[TMP110]] to i64 +; CHECK-NEXT: [[TMP113:%.*]] = bitcast <64 x i1> [[TMP111]] to i64 +; CHECK-NEXT: [[_MSPROP5:%.*]] = or i64 [[_MSPROP4]], [[TMP112]] +; CHECK-NEXT: [[RET5:%.*]] = add i64 [[RET4]], [[TMP113]] +; CHECK-NEXT: [[TMP114:%.*]] = xor <64 x i8> [[A0]], splat (i8 -128) +; CHECK-NEXT: [[TMP115:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP116:%.*]] = and <64 x i8> [[TMP114]], [[TMP115]] +; CHECK-NEXT: [[TMP117:%.*]] = or <64 x i8> [[TMP114]], [[TMP1]] +; CHECK-NEXT: [[TMP118:%.*]] = xor <64 x i8> [[A1]], splat (i8 -128) +; CHECK-NEXT: [[TMP119:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP120:%.*]] = and <64 x i8> [[TMP118]], [[TMP119]] +; CHECK-NEXT: [[TMP121:%.*]] = or <64 x i8> [[TMP118]], [[TMP2]] +; CHECK-NEXT: [[TMP122:%.*]] = icmp ugt <64 x i8> [[TMP116]], [[TMP121]] +; CHECK-NEXT: [[TMP123:%.*]] = icmp ugt <64 x i8> [[TMP117]], [[TMP120]] +; CHECK-NEXT: [[TMP124:%.*]] = xor <64 x i1> [[TMP122]], [[TMP123]] +; CHECK-NEXT: [[TMP125:%.*]] = icmp sgt <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP126:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP127:%.*]] = bitcast i64 [[MASK]] to <64 x i1> +; CHECK-NEXT: [[TMP128:%.*]] = and <64 x i1> [[TMP124]], [[TMP126]] +; CHECK-NEXT: [[TMP129:%.*]] = and <64 x i1> [[TMP125]], [[TMP126]] +; CHECK-NEXT: [[TMP130:%.*]] = and <64 x i1> [[TMP124]], [[TMP127]] +; CHECK-NEXT: [[TMP131:%.*]] = or <64 x i1> [[TMP128]], [[TMP129]] +; CHECK-NEXT: [[TMP132:%.*]] = or <64 x i1> [[TMP131]], [[TMP130]] +; CHECK-NEXT: [[TMP133:%.*]] = and <64 x i1> [[TMP125]], [[TMP127]] +; CHECK-NEXT: [[TMP134:%.*]] = bitcast <64 x i1> [[TMP132]] to i64 +; CHECK-NEXT: [[TMP135:%.*]] = bitcast <64 x i1> [[TMP133]] to i64 +; CHECK-NEXT: [[_MSPROP6:%.*]] = or i64 [[_MSPROP5]], [[TMP134]] +; CHECK-NEXT: [[RET6:%.*]] = add i64 [[RET5]], [[TMP135]] +; CHECK-NEXT: [[TMP136:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP137:%.*]] = bitcast i64 [[MASK]] to <64 x i1> +; CHECK-NEXT: [[TMP138:%.*]] = and <64 x i1> zeroinitializer, [[TMP136]] +; CHECK-NEXT: [[TMP139:%.*]] = and <64 x i1> splat (i1 true), [[TMP136]] +; CHECK-NEXT: [[TMP140:%.*]] = and <64 x i1> zeroinitializer, [[TMP137]] +; CHECK-NEXT: [[TMP141:%.*]] = or <64 x i1> [[TMP138]], [[TMP139]] +; CHECK-NEXT: [[TMP142:%.*]] = or <64 x i1> [[TMP141]], [[TMP140]] +; CHECK-NEXT: [[TMP143:%.*]] = and <64 x i1> splat (i1 true), [[TMP137]] +; CHECK-NEXT: [[TMP144:%.*]] = bitcast <64 x i1> [[TMP142]] to i64 +; CHECK-NEXT: [[TMP145:%.*]] = bitcast <64 x i1> [[TMP143]] to i64 +; CHECK-NEXT: [[_MSPROP7:%.*]] = or i64 [[_MSPROP6]], [[TMP144]] +; CHECK-NEXT: [[RET7:%.*]] = add i64 [[RET6]], [[TMP145]] +; CHECK-NEXT: store i64 [[_MSPROP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[RET7]] +; + %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) + %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) + %ret1 = add i64 %res0, %res1 + %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) + %ret2 = add i64 %ret1, %res2 + %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) + %ret3 = add i64 %ret2, %res3 + %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask) + %ret4 = add i64 %ret3, %res4 + %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask) + %ret5 = add i64 %ret4, %res5 + %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask) + %ret6 = add i64 %ret5, %res6 + %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask) + %ret7 = add i64 %ret6, %res7 + ret i64 %ret7 +} + +declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone + +define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind #0 { +; CHECK-LABEL: @test_ucmp_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <64 x i8> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <64 x i8> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <64 x i8> [[TMP4]], splat (i8 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <64 x i8> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <64 x i8> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <64 x i1> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <64 x i1> [[_MSPROP_ICMP]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <64 x i1> [[TMP9]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP13:%.*]] = and <64 x i8> [[A0]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP15:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP16:%.*]] = and <64 x i8> [[A1]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = or <64 x i8> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp ult <64 x i8> [[TMP13]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp ult <64 x i8> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = xor <64 x i1> [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp ult <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP22:%.*]] = bitcast <64 x i1> [[TMP20]] to i64 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <64 x i1> [[TMP21]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP10]], [[TMP22]] +; CHECK-NEXT: [[RET1:%.*]] = add i64 [[TMP11]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP25:%.*]] = and <64 x i8> [[A0]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = or <64 x i8> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP27:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP28:%.*]] = and <64 x i8> [[A1]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = or <64 x i8> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp ule <64 x i8> [[TMP25]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp ule <64 x i8> [[TMP26]], [[TMP28]] +; CHECK-NEXT: [[TMP32:%.*]] = xor <64 x i1> [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = icmp ule <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP34:%.*]] = bitcast <64 x i1> [[TMP32]] to i64 +; CHECK-NEXT: [[TMP35:%.*]] = bitcast <64 x i1> [[TMP33]] to i64 +; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], [[TMP34]] +; CHECK-NEXT: [[RET2:%.*]] = add i64 [[RET1]], [[TMP35]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = or i64 [[_MSPROP1]], 0 +; CHECK-NEXT: [[RET3:%.*]] = add i64 [[RET2]], 0 +; CHECK-NEXT: [[TMP36:%.*]] = xor <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP37:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp ne <64 x i8> [[TMP37]], zeroinitializer +; CHECK-NEXT: [[TMP39:%.*]] = xor <64 x i8> [[TMP37]], splat (i8 -1) +; CHECK-NEXT: [[TMP40:%.*]] = and <64 x i8> [[TMP39]], [[TMP36]] +; CHECK-NEXT: [[TMP41:%.*]] = icmp eq <64 x i8> [[TMP40]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP3:%.*]] = and <64 x i1> [[TMP38]], [[TMP41]] +; CHECK-NEXT: [[TMP42:%.*]] = icmp ne <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP43:%.*]] = bitcast <64 x i1> [[_MSPROP_ICMP3]] to i64 +; CHECK-NEXT: [[TMP44:%.*]] = bitcast <64 x i1> [[TMP42]] to i64 +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP2]], [[TMP43]] +; CHECK-NEXT: [[RET4:%.*]] = add i64 [[RET3]], [[TMP44]] +; CHECK-NEXT: [[TMP45:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP46:%.*]] = and <64 x i8> [[A0]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = or <64 x i8> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP48:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP49:%.*]] = and <64 x i8> [[A1]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = or <64 x i8> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP51:%.*]] = icmp uge <64 x i8> [[TMP46]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = icmp uge <64 x i8> [[TMP47]], [[TMP49]] +; CHECK-NEXT: [[TMP53:%.*]] = xor <64 x i1> [[TMP51]], [[TMP52]] +; CHECK-NEXT: [[TMP54:%.*]] = icmp uge <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <64 x i1> [[TMP53]] to i64 +; CHECK-NEXT: [[TMP56:%.*]] = bitcast <64 x i1> [[TMP54]] to i64 +; CHECK-NEXT: [[_MSPROP5:%.*]] = or i64 [[_MSPROP4]], [[TMP55]] +; CHECK-NEXT: [[RET5:%.*]] = add i64 [[RET4]], [[TMP56]] +; CHECK-NEXT: [[TMP57:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP58:%.*]] = and <64 x i8> [[A0]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = or <64 x i8> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP60:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP61:%.*]] = and <64 x i8> [[A1]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = or <64 x i8> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP63:%.*]] = icmp ugt <64 x i8> [[TMP58]], [[TMP62]] +; CHECK-NEXT: [[TMP64:%.*]] = icmp ugt <64 x i8> [[TMP59]], [[TMP61]] +; CHECK-NEXT: [[TMP65:%.*]] = xor <64 x i1> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = icmp ugt <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP67:%.*]] = bitcast <64 x i1> [[TMP65]] to i64 +; CHECK-NEXT: [[TMP68:%.*]] = bitcast <64 x i1> [[TMP66]] to i64 +; CHECK-NEXT: [[_MSPROP6:%.*]] = or i64 [[_MSPROP5]], [[TMP67]] +; CHECK-NEXT: [[RET6:%.*]] = add i64 [[RET5]], [[TMP68]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = or i64 [[_MSPROP6]], 0 +; CHECK-NEXT: [[RET7:%.*]] = add i64 [[RET6]], -1 +; CHECK-NEXT: store i64 [[_MSPROP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[RET7]] +; + %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) + %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) + %ret1 = add i64 %res0, %res1 + %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) + %ret2 = add i64 %ret1, %res2 + %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) + %ret3 = add i64 %ret2, %res3 + %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1) + %ret4 = add i64 %ret3, %res4 + %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1) + %ret5 = add i64 %ret4, %res5 + %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1) + %ret6 = add i64 %ret5, %res6 + %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1) + %ret7 = add i64 %ret6, %res7 + ret i64 %ret7 +} + +define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_x86_avx512_ucmp_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = xor <64 x i8> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <64 x i8> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <64 x i8> [[TMP5]], splat (i8 -1) +; CHECK-NEXT: [[TMP8:%.*]] = and <64 x i8> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <64 x i8> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <64 x i1> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = and <64 x i1> [[_MSPROP_ICMP]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = and <64 x i1> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = and <64 x i1> [[_MSPROP_ICMP]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <64 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <64 x i1> [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i1> [[TMP17]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <64 x i1> [[TMP18]] to i64 +; CHECK-NEXT: [[TMP21:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP22:%.*]] = and <64 x i8> [[A0]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = or <64 x i8> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP24:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP25:%.*]] = and <64 x i8> [[A1]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = or <64 x i8> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp ult <64 x i8> [[TMP22]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp ult <64 x i8> [[TMP23]], [[TMP25]] +; CHECK-NEXT: [[TMP29:%.*]] = xor <64 x i1> [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp ult <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP31:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP32:%.*]] = bitcast i64 [[MASK]] to <64 x i1> +; CHECK-NEXT: [[TMP33:%.*]] = and <64 x i1> [[TMP29]], [[TMP31]] +; CHECK-NEXT: [[TMP34:%.*]] = and <64 x i1> [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP35:%.*]] = and <64 x i1> [[TMP29]], [[TMP32]] +; CHECK-NEXT: [[TMP36:%.*]] = or <64 x i1> [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP37:%.*]] = or <64 x i1> [[TMP36]], [[TMP35]] +; CHECK-NEXT: [[TMP38:%.*]] = and <64 x i1> [[TMP30]], [[TMP32]] +; CHECK-NEXT: [[TMP39:%.*]] = bitcast <64 x i1> [[TMP37]] to i64 +; CHECK-NEXT: [[TMP40:%.*]] = bitcast <64 x i1> [[TMP38]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP19]], [[TMP39]] +; CHECK-NEXT: [[RET1:%.*]] = add i64 [[TMP20]], [[TMP40]] +; CHECK-NEXT: [[TMP41:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP42:%.*]] = and <64 x i8> [[A0]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = or <64 x i8> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP44:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP45:%.*]] = and <64 x i8> [[A1]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = or <64 x i8> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP47:%.*]] = icmp ule <64 x i8> [[TMP42]], [[TMP46]] +; CHECK-NEXT: [[TMP48:%.*]] = icmp ule <64 x i8> [[TMP43]], [[TMP45]] +; CHECK-NEXT: [[TMP49:%.*]] = xor <64 x i1> [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = icmp ule <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP51:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP52:%.*]] = bitcast i64 [[MASK]] to <64 x i1> +; CHECK-NEXT: [[TMP53:%.*]] = and <64 x i1> [[TMP49]], [[TMP51]] +; CHECK-NEXT: [[TMP54:%.*]] = and <64 x i1> [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP55:%.*]] = and <64 x i1> [[TMP49]], [[TMP52]] +; CHECK-NEXT: [[TMP56:%.*]] = or <64 x i1> [[TMP53]], [[TMP54]] +; CHECK-NEXT: [[TMP57:%.*]] = or <64 x i1> [[TMP56]], [[TMP55]] +; CHECK-NEXT: [[TMP58:%.*]] = and <64 x i1> [[TMP50]], [[TMP52]] +; CHECK-NEXT: [[TMP59:%.*]] = bitcast <64 x i1> [[TMP57]] to i64 +; CHECK-NEXT: [[TMP60:%.*]] = bitcast <64 x i1> [[TMP58]] to i64 +; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], [[TMP59]] +; CHECK-NEXT: [[RET2:%.*]] = add i64 [[RET1]], [[TMP60]] +; CHECK-NEXT: [[TMP61:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP62:%.*]] = bitcast i64 [[MASK]] to <64 x i1> +; CHECK-NEXT: [[TMP63:%.*]] = and <64 x i1> zeroinitializer, [[TMP61]] +; CHECK-NEXT: [[TMP64:%.*]] = and <64 x i1> zeroinitializer, [[TMP61]] +; CHECK-NEXT: [[TMP65:%.*]] = and <64 x i1> zeroinitializer, [[TMP62]] +; CHECK-NEXT: [[TMP66:%.*]] = or <64 x i1> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP67:%.*]] = or <64 x i1> [[TMP66]], [[TMP65]] +; CHECK-NEXT: [[TMP68:%.*]] = and <64 x i1> zeroinitializer, [[TMP62]] +; CHECK-NEXT: [[TMP69:%.*]] = bitcast <64 x i1> [[TMP67]] to i64 +; CHECK-NEXT: [[TMP70:%.*]] = bitcast <64 x i1> [[TMP68]] to i64 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or i64 [[_MSPROP1]], [[TMP69]] +; CHECK-NEXT: [[RET3:%.*]] = add i64 [[RET2]], [[TMP70]] +; CHECK-NEXT: [[TMP71:%.*]] = xor <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP72:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp ne <64 x i8> [[TMP72]], zeroinitializer +; CHECK-NEXT: [[TMP74:%.*]] = xor <64 x i8> [[TMP72]], splat (i8 -1) +; CHECK-NEXT: [[TMP75:%.*]] = and <64 x i8> [[TMP74]], [[TMP71]] +; CHECK-NEXT: [[TMP76:%.*]] = icmp eq <64 x i8> [[TMP75]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP3:%.*]] = and <64 x i1> [[TMP73]], [[TMP76]] +; CHECK-NEXT: [[TMP77:%.*]] = icmp ne <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP78:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP79:%.*]] = bitcast i64 [[MASK]] to <64 x i1> +; CHECK-NEXT: [[TMP80:%.*]] = and <64 x i1> [[_MSPROP_ICMP3]], [[TMP78]] +; CHECK-NEXT: [[TMP81:%.*]] = and <64 x i1> [[TMP77]], [[TMP78]] +; CHECK-NEXT: [[TMP82:%.*]] = and <64 x i1> [[_MSPROP_ICMP3]], [[TMP79]] +; CHECK-NEXT: [[TMP83:%.*]] = or <64 x i1> [[TMP80]], [[TMP81]] +; CHECK-NEXT: [[TMP84:%.*]] = or <64 x i1> [[TMP83]], [[TMP82]] +; CHECK-NEXT: [[TMP85:%.*]] = and <64 x i1> [[TMP77]], [[TMP79]] +; CHECK-NEXT: [[TMP86:%.*]] = bitcast <64 x i1> [[TMP84]] to i64 +; CHECK-NEXT: [[TMP87:%.*]] = bitcast <64 x i1> [[TMP85]] to i64 +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP2]], [[TMP86]] +; CHECK-NEXT: [[RET4:%.*]] = add i64 [[RET3]], [[TMP87]] +; CHECK-NEXT: [[TMP88:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP89:%.*]] = and <64 x i8> [[A0]], [[TMP88]] +; CHECK-NEXT: [[TMP90:%.*]] = or <64 x i8> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP91:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP92:%.*]] = and <64 x i8> [[A1]], [[TMP91]] +; CHECK-NEXT: [[TMP93:%.*]] = or <64 x i8> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP94:%.*]] = icmp uge <64 x i8> [[TMP89]], [[TMP93]] +; CHECK-NEXT: [[TMP95:%.*]] = icmp uge <64 x i8> [[TMP90]], [[TMP92]] +; CHECK-NEXT: [[TMP96:%.*]] = xor <64 x i1> [[TMP94]], [[TMP95]] +; CHECK-NEXT: [[TMP97:%.*]] = icmp uge <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP98:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP99:%.*]] = bitcast i64 [[MASK]] to <64 x i1> +; CHECK-NEXT: [[TMP100:%.*]] = and <64 x i1> [[TMP96]], [[TMP98]] +; CHECK-NEXT: [[TMP101:%.*]] = and <64 x i1> [[TMP97]], [[TMP98]] +; CHECK-NEXT: [[TMP102:%.*]] = and <64 x i1> [[TMP96]], [[TMP99]] +; CHECK-NEXT: [[TMP103:%.*]] = or <64 x i1> [[TMP100]], [[TMP101]] +; CHECK-NEXT: [[TMP104:%.*]] = or <64 x i1> [[TMP103]], [[TMP102]] +; CHECK-NEXT: [[TMP105:%.*]] = and <64 x i1> [[TMP97]], [[TMP99]] +; CHECK-NEXT: [[TMP106:%.*]] = bitcast <64 x i1> [[TMP104]] to i64 +; CHECK-NEXT: [[TMP107:%.*]] = bitcast <64 x i1> [[TMP105]] to i64 +; CHECK-NEXT: [[_MSPROP5:%.*]] = or i64 [[_MSPROP4]], [[TMP106]] +; CHECK-NEXT: [[RET5:%.*]] = add i64 [[RET4]], [[TMP107]] +; CHECK-NEXT: [[TMP108:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP109:%.*]] = and <64 x i8> [[A0]], [[TMP108]] +; CHECK-NEXT: [[TMP110:%.*]] = or <64 x i8> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP111:%.*]] = xor <64 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: [[TMP112:%.*]] = and <64 x i8> [[A1]], [[TMP111]] +; CHECK-NEXT: [[TMP113:%.*]] = or <64 x i8> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP114:%.*]] = icmp ugt <64 x i8> [[TMP109]], [[TMP113]] +; CHECK-NEXT: [[TMP115:%.*]] = icmp ugt <64 x i8> [[TMP110]], [[TMP112]] +; CHECK-NEXT: [[TMP116:%.*]] = xor <64 x i1> [[TMP114]], [[TMP115]] +; CHECK-NEXT: [[TMP117:%.*]] = icmp ugt <64 x i8> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP118:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP119:%.*]] = bitcast i64 [[MASK]] to <64 x i1> +; CHECK-NEXT: [[TMP120:%.*]] = and <64 x i1> [[TMP116]], [[TMP118]] +; CHECK-NEXT: [[TMP121:%.*]] = and <64 x i1> [[TMP117]], [[TMP118]] +; CHECK-NEXT: [[TMP122:%.*]] = and <64 x i1> [[TMP116]], [[TMP119]] +; CHECK-NEXT: [[TMP123:%.*]] = or <64 x i1> [[TMP120]], [[TMP121]] +; CHECK-NEXT: [[TMP124:%.*]] = or <64 x i1> [[TMP123]], [[TMP122]] +; CHECK-NEXT: [[TMP125:%.*]] = and <64 x i1> [[TMP117]], [[TMP119]] +; CHECK-NEXT: [[TMP126:%.*]] = bitcast <64 x i1> [[TMP124]] to i64 +; CHECK-NEXT: [[TMP127:%.*]] = bitcast <64 x i1> [[TMP125]] to i64 +; CHECK-NEXT: [[_MSPROP6:%.*]] = or i64 [[_MSPROP5]], [[TMP126]] +; CHECK-NEXT: [[RET6:%.*]] = add i64 [[RET5]], [[TMP127]] +; CHECK-NEXT: [[TMP128:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP129:%.*]] = bitcast i64 [[MASK]] to <64 x i1> +; CHECK-NEXT: [[TMP130:%.*]] = and <64 x i1> zeroinitializer, [[TMP128]] +; CHECK-NEXT: [[TMP131:%.*]] = and <64 x i1> splat (i1 true), [[TMP128]] +; CHECK-NEXT: [[TMP132:%.*]] = and <64 x i1> zeroinitializer, [[TMP129]] +; CHECK-NEXT: [[TMP133:%.*]] = or <64 x i1> [[TMP130]], [[TMP131]] +; CHECK-NEXT: [[TMP134:%.*]] = or <64 x i1> [[TMP133]], [[TMP132]] +; CHECK-NEXT: [[TMP135:%.*]] = and <64 x i1> splat (i1 true), [[TMP129]] +; CHECK-NEXT: [[TMP136:%.*]] = bitcast <64 x i1> [[TMP134]] to i64 +; CHECK-NEXT: [[TMP137:%.*]] = bitcast <64 x i1> [[TMP135]] to i64 +; CHECK-NEXT: [[_MSPROP7:%.*]] = or i64 [[_MSPROP6]], [[TMP136]] +; CHECK-NEXT: [[RET7:%.*]] = add i64 [[RET6]], [[TMP137]] +; CHECK-NEXT: store i64 [[_MSPROP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[RET7]] +; + %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) + %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) + %ret1 = add i64 %res0, %res1 + %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) + %ret2 = add i64 %ret1, %res2 + %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) + %ret3 = add i64 %ret2, %res3 + %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask) + %ret4 = add i64 %ret3, %res4 + %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask) + %ret5 = add i64 %ret4, %res5 + %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask) + %ret6 = add i64 %ret5, %res6 + %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask) + %ret7 = add i64 %ret6, %res7 + ret i64 %ret7 +} + +declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone + +define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) nounwind #0 { +; CHECK-LABEL: @test_cmp_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <32 x i16> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <32 x i16> [[TMP4]], splat (i16 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <32 x i16> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <32 x i16> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <32 x i1> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i1> [[_MSPROP_ICMP]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i1> [[TMP9]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[A0]], splat (i16 -32768) +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i16> [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP12]], [[TMP1]] +; CHECK-NEXT: [[TMP16:%.*]] = xor <32 x i16> [[A1]], splat (i16 -32768) +; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP18:%.*]] = and <32 x i16> [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP16]], [[TMP2]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp ult <32 x i16> [[TMP14]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp ult <32 x i16> [[TMP15]], [[TMP18]] +; CHECK-NEXT: [[TMP22:%.*]] = xor <32 x i1> [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = icmp slt <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <32 x i1> [[TMP22]] to i32 +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <32 x i1> [[TMP23]] to i32 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP10]], [[TMP24]] +; CHECK-NEXT: [[RET1:%.*]] = add i32 [[TMP11]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = xor <32 x i16> [[A0]], splat (i16 -32768) +; CHECK-NEXT: [[TMP27:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP28:%.*]] = and <32 x i16> [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = or <32 x i16> [[TMP26]], [[TMP1]] +; CHECK-NEXT: [[TMP30:%.*]] = xor <32 x i16> [[A1]], splat (i16 -32768) +; CHECK-NEXT: [[TMP31:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP32:%.*]] = and <32 x i16> [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = or <32 x i16> [[TMP30]], [[TMP2]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp ule <32 x i16> [[TMP28]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = icmp ule <32 x i16> [[TMP29]], [[TMP32]] +; CHECK-NEXT: [[TMP36:%.*]] = xor <32 x i1> [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = icmp sle <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP38:%.*]] = bitcast <32 x i1> [[TMP36]] to i32 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast <32 x i1> [[TMP37]] to i32 +; CHECK-NEXT: [[_MSPROP1:%.*]] = or i32 [[_MSPROP]], [[TMP38]] +; CHECK-NEXT: [[RET2:%.*]] = add i32 [[RET1]], [[TMP39]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = or i32 [[_MSPROP1]], 0 +; CHECK-NEXT: [[RET3:%.*]] = add i32 [[RET2]], 0 +; CHECK-NEXT: [[TMP40:%.*]] = xor <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP41:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP42:%.*]] = icmp ne <32 x i16> [[TMP41]], zeroinitializer +; CHECK-NEXT: [[TMP43:%.*]] = xor <32 x i16> [[TMP41]], splat (i16 -1) +; CHECK-NEXT: [[TMP44:%.*]] = and <32 x i16> [[TMP43]], [[TMP40]] +; CHECK-NEXT: [[TMP45:%.*]] = icmp eq <32 x i16> [[TMP44]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP3:%.*]] = and <32 x i1> [[TMP42]], [[TMP45]] +; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP47:%.*]] = bitcast <32 x i1> [[_MSPROP_ICMP3]] to i32 +; CHECK-NEXT: [[TMP48:%.*]] = bitcast <32 x i1> [[TMP46]] to i32 +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP2]], [[TMP47]] +; CHECK-NEXT: [[RET4:%.*]] = add i32 [[RET3]], [[TMP48]] +; CHECK-NEXT: [[TMP49:%.*]] = xor <32 x i16> [[A0]], splat (i16 -32768) +; CHECK-NEXT: [[TMP50:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP51:%.*]] = and <32 x i16> [[TMP49]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = or <32 x i16> [[TMP49]], [[TMP1]] +; CHECK-NEXT: [[TMP53:%.*]] = xor <32 x i16> [[A1]], splat (i16 -32768) +; CHECK-NEXT: [[TMP54:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP55:%.*]] = and <32 x i16> [[TMP53]], [[TMP54]] +; CHECK-NEXT: [[TMP56:%.*]] = or <32 x i16> [[TMP53]], [[TMP2]] +; CHECK-NEXT: [[TMP57:%.*]] = icmp uge <32 x i16> [[TMP51]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = icmp uge <32 x i16> [[TMP52]], [[TMP55]] +; CHECK-NEXT: [[TMP59:%.*]] = xor <32 x i1> [[TMP57]], [[TMP58]] +; CHECK-NEXT: [[TMP60:%.*]] = icmp sge <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP61:%.*]] = bitcast <32 x i1> [[TMP59]] to i32 +; CHECK-NEXT: [[TMP62:%.*]] = bitcast <32 x i1> [[TMP60]] to i32 +; CHECK-NEXT: [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[TMP61]] +; CHECK-NEXT: [[RET5:%.*]] = add i32 [[RET4]], [[TMP62]] +; CHECK-NEXT: [[TMP63:%.*]] = xor <32 x i16> [[A0]], splat (i16 -32768) +; CHECK-NEXT: [[TMP64:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP65:%.*]] = and <32 x i16> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = or <32 x i16> [[TMP63]], [[TMP1]] +; CHECK-NEXT: [[TMP67:%.*]] = xor <32 x i16> [[A1]], splat (i16 -32768) +; CHECK-NEXT: [[TMP68:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP69:%.*]] = and <32 x i16> [[TMP67]], [[TMP68]] +; CHECK-NEXT: [[TMP70:%.*]] = or <32 x i16> [[TMP67]], [[TMP2]] +; CHECK-NEXT: [[TMP71:%.*]] = icmp ugt <32 x i16> [[TMP65]], [[TMP70]] +; CHECK-NEXT: [[TMP72:%.*]] = icmp ugt <32 x i16> [[TMP66]], [[TMP69]] +; CHECK-NEXT: [[TMP73:%.*]] = xor <32 x i1> [[TMP71]], [[TMP72]] +; CHECK-NEXT: [[TMP74:%.*]] = icmp sgt <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP75:%.*]] = bitcast <32 x i1> [[TMP73]] to i32 +; CHECK-NEXT: [[TMP76:%.*]] = bitcast <32 x i1> [[TMP74]] to i32 +; CHECK-NEXT: [[_MSPROP6:%.*]] = or i32 [[_MSPROP5]], [[TMP75]] +; CHECK-NEXT: [[RET6:%.*]] = add i32 [[RET5]], [[TMP76]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = or i32 [[_MSPROP6]], 0 +; CHECK-NEXT: [[RET7:%.*]] = add i32 [[RET6]], -1 +; CHECK-NEXT: store i32 [[_MSPROP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RET7]] +; + %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) + %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1) + %ret1 = add i32 %res0, %res1 + %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1) + %ret2 = add i32 %ret1, %res2 + %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1) + %ret3 = add i32 %ret2, %res3 + %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1) + %ret4 = add i32 %ret3, %res4 + %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1) + %ret5 = add i32 %ret4, %res5 + %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1) + %ret6 = add i32 %ret5, %res6 + %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1) + %ret7 = add i32 %ret6, %res7 + ret i32 %ret7 +} + +define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_cmp_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = xor <32 x i16> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i16> [[TMP5]], splat (i16 -1) +; CHECK-NEXT: [[TMP8:%.*]] = and <32 x i16> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <32 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <32 x i1> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i1> [[_MSPROP_ICMP]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i1> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = and <32 x i1> [[_MSPROP_ICMP]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <32 x i1> [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i1> [[TMP17]] to i32 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <32 x i1> [[TMP18]] to i32 +; CHECK-NEXT: [[TMP21:%.*]] = xor <32 x i16> [[A0]], splat (i16 -32768) +; CHECK-NEXT: [[TMP22:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP23:%.*]] = and <32 x i16> [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = or <32 x i16> [[TMP21]], [[TMP1]] +; CHECK-NEXT: [[TMP25:%.*]] = xor <32 x i16> [[A1]], splat (i16 -32768) +; CHECK-NEXT: [[TMP26:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP27:%.*]] = and <32 x i16> [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = or <32 x i16> [[TMP25]], [[TMP2]] +; CHECK-NEXT: [[TMP29:%.*]] = icmp ult <32 x i16> [[TMP23]], [[TMP28]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp ult <32 x i16> [[TMP24]], [[TMP27]] +; CHECK-NEXT: [[TMP31:%.*]] = xor <32 x i1> [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = icmp slt <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP34:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP35:%.*]] = and <32 x i1> [[TMP31]], [[TMP33]] +; CHECK-NEXT: [[TMP36:%.*]] = and <32 x i1> [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP37:%.*]] = and <32 x i1> [[TMP31]], [[TMP34]] +; CHECK-NEXT: [[TMP38:%.*]] = or <32 x i1> [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP39:%.*]] = or <32 x i1> [[TMP38]], [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = and <32 x i1> [[TMP32]], [[TMP34]] +; CHECK-NEXT: [[TMP41:%.*]] = bitcast <32 x i1> [[TMP39]] to i32 +; CHECK-NEXT: [[TMP42:%.*]] = bitcast <32 x i1> [[TMP40]] to i32 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP19]], [[TMP41]] +; CHECK-NEXT: [[RET1:%.*]] = add i32 [[TMP20]], [[TMP42]] +; CHECK-NEXT: [[TMP43:%.*]] = xor <32 x i16> [[A0]], splat (i16 -32768) +; CHECK-NEXT: [[TMP44:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP45:%.*]] = and <32 x i16> [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = or <32 x i16> [[TMP43]], [[TMP1]] +; CHECK-NEXT: [[TMP47:%.*]] = xor <32 x i16> [[A1]], splat (i16 -32768) +; CHECK-NEXT: [[TMP48:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP49:%.*]] = and <32 x i16> [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = or <32 x i16> [[TMP47]], [[TMP2]] +; CHECK-NEXT: [[TMP51:%.*]] = icmp ule <32 x i16> [[TMP45]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = icmp ule <32 x i16> [[TMP46]], [[TMP49]] +; CHECK-NEXT: [[TMP53:%.*]] = xor <32 x i1> [[TMP51]], [[TMP52]] +; CHECK-NEXT: [[TMP54:%.*]] = icmp sle <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP55:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP56:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP57:%.*]] = and <32 x i1> [[TMP53]], [[TMP55]] +; CHECK-NEXT: [[TMP58:%.*]] = and <32 x i1> [[TMP54]], [[TMP55]] +; CHECK-NEXT: [[TMP59:%.*]] = and <32 x i1> [[TMP53]], [[TMP56]] +; CHECK-NEXT: [[TMP60:%.*]] = or <32 x i1> [[TMP57]], [[TMP58]] +; CHECK-NEXT: [[TMP61:%.*]] = or <32 x i1> [[TMP60]], [[TMP59]] +; CHECK-NEXT: [[TMP62:%.*]] = and <32 x i1> [[TMP54]], [[TMP56]] +; CHECK-NEXT: [[TMP63:%.*]] = bitcast <32 x i1> [[TMP61]] to i32 +; CHECK-NEXT: [[TMP64:%.*]] = bitcast <32 x i1> [[TMP62]] to i32 +; CHECK-NEXT: [[_MSPROP1:%.*]] = or i32 [[_MSPROP]], [[TMP63]] +; CHECK-NEXT: [[RET2:%.*]] = add i32 [[RET1]], [[TMP64]] +; CHECK-NEXT: [[TMP65:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP66:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP67:%.*]] = and <32 x i1> zeroinitializer, [[TMP65]] +; CHECK-NEXT: [[TMP68:%.*]] = and <32 x i1> zeroinitializer, [[TMP65]] +; CHECK-NEXT: [[TMP69:%.*]] = and <32 x i1> zeroinitializer, [[TMP66]] +; CHECK-NEXT: [[TMP70:%.*]] = or <32 x i1> [[TMP67]], [[TMP68]] +; CHECK-NEXT: [[TMP71:%.*]] = or <32 x i1> [[TMP70]], [[TMP69]] +; CHECK-NEXT: [[TMP72:%.*]] = and <32 x i1> zeroinitializer, [[TMP66]] +; CHECK-NEXT: [[TMP73:%.*]] = bitcast <32 x i1> [[TMP71]] to i32 +; CHECK-NEXT: [[TMP74:%.*]] = bitcast <32 x i1> [[TMP72]] to i32 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or i32 [[_MSPROP1]], [[TMP73]] +; CHECK-NEXT: [[RET3:%.*]] = add i32 [[RET2]], [[TMP74]] +; CHECK-NEXT: [[TMP75:%.*]] = xor <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP76:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP77:%.*]] = icmp ne <32 x i16> [[TMP76]], zeroinitializer +; CHECK-NEXT: [[TMP78:%.*]] = xor <32 x i16> [[TMP76]], splat (i16 -1) +; CHECK-NEXT: [[TMP79:%.*]] = and <32 x i16> [[TMP78]], [[TMP75]] +; CHECK-NEXT: [[TMP80:%.*]] = icmp eq <32 x i16> [[TMP79]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP3:%.*]] = and <32 x i1> [[TMP77]], [[TMP80]] +; CHECK-NEXT: [[TMP81:%.*]] = icmp ne <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP82:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP83:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP84:%.*]] = and <32 x i1> [[_MSPROP_ICMP3]], [[TMP82]] +; CHECK-NEXT: [[TMP85:%.*]] = and <32 x i1> [[TMP81]], [[TMP82]] +; CHECK-NEXT: [[TMP86:%.*]] = and <32 x i1> [[_MSPROP_ICMP3]], [[TMP83]] +; CHECK-NEXT: [[TMP87:%.*]] = or <32 x i1> [[TMP84]], [[TMP85]] +; CHECK-NEXT: [[TMP88:%.*]] = or <32 x i1> [[TMP87]], [[TMP86]] +; CHECK-NEXT: [[TMP89:%.*]] = and <32 x i1> [[TMP81]], [[TMP83]] +; CHECK-NEXT: [[TMP90:%.*]] = bitcast <32 x i1> [[TMP88]] to i32 +; CHECK-NEXT: [[TMP91:%.*]] = bitcast <32 x i1> [[TMP89]] to i32 +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP2]], [[TMP90]] +; CHECK-NEXT: [[RET4:%.*]] = add i32 [[RET3]], [[TMP91]] +; CHECK-NEXT: [[TMP92:%.*]] = xor <32 x i16> [[A0]], splat (i16 -32768) +; CHECK-NEXT: [[TMP93:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP94:%.*]] = and <32 x i16> [[TMP92]], [[TMP93]] +; CHECK-NEXT: [[TMP95:%.*]] = or <32 x i16> [[TMP92]], [[TMP1]] +; CHECK-NEXT: [[TMP96:%.*]] = xor <32 x i16> [[A1]], splat (i16 -32768) +; CHECK-NEXT: [[TMP97:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP98:%.*]] = and <32 x i16> [[TMP96]], [[TMP97]] +; CHECK-NEXT: [[TMP99:%.*]] = or <32 x i16> [[TMP96]], [[TMP2]] +; CHECK-NEXT: [[TMP100:%.*]] = icmp uge <32 x i16> [[TMP94]], [[TMP99]] +; CHECK-NEXT: [[TMP101:%.*]] = icmp uge <32 x i16> [[TMP95]], [[TMP98]] +; CHECK-NEXT: [[TMP102:%.*]] = xor <32 x i1> [[TMP100]], [[TMP101]] +; CHECK-NEXT: [[TMP103:%.*]] = icmp sge <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP104:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP105:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP106:%.*]] = and <32 x i1> [[TMP102]], [[TMP104]] +; CHECK-NEXT: [[TMP107:%.*]] = and <32 x i1> [[TMP103]], [[TMP104]] +; CHECK-NEXT: [[TMP108:%.*]] = and <32 x i1> [[TMP102]], [[TMP105]] +; CHECK-NEXT: [[TMP109:%.*]] = or <32 x i1> [[TMP106]], [[TMP107]] +; CHECK-NEXT: [[TMP110:%.*]] = or <32 x i1> [[TMP109]], [[TMP108]] +; CHECK-NEXT: [[TMP111:%.*]] = and <32 x i1> [[TMP103]], [[TMP105]] +; CHECK-NEXT: [[TMP112:%.*]] = bitcast <32 x i1> [[TMP110]] to i32 +; CHECK-NEXT: [[TMP113:%.*]] = bitcast <32 x i1> [[TMP111]] to i32 +; CHECK-NEXT: [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[TMP112]] +; CHECK-NEXT: [[RET5:%.*]] = add i32 [[RET4]], [[TMP113]] +; CHECK-NEXT: [[TMP114:%.*]] = xor <32 x i16> [[A0]], splat (i16 -32768) +; CHECK-NEXT: [[TMP115:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP116:%.*]] = and <32 x i16> [[TMP114]], [[TMP115]] +; CHECK-NEXT: [[TMP117:%.*]] = or <32 x i16> [[TMP114]], [[TMP1]] +; CHECK-NEXT: [[TMP118:%.*]] = xor <32 x i16> [[A1]], splat (i16 -32768) +; CHECK-NEXT: [[TMP119:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP120:%.*]] = and <32 x i16> [[TMP118]], [[TMP119]] +; CHECK-NEXT: [[TMP121:%.*]] = or <32 x i16> [[TMP118]], [[TMP2]] +; CHECK-NEXT: [[TMP122:%.*]] = icmp ugt <32 x i16> [[TMP116]], [[TMP121]] +; CHECK-NEXT: [[TMP123:%.*]] = icmp ugt <32 x i16> [[TMP117]], [[TMP120]] +; CHECK-NEXT: [[TMP124:%.*]] = xor <32 x i1> [[TMP122]], [[TMP123]] +; CHECK-NEXT: [[TMP125:%.*]] = icmp sgt <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP126:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP127:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP128:%.*]] = and <32 x i1> [[TMP124]], [[TMP126]] +; CHECK-NEXT: [[TMP129:%.*]] = and <32 x i1> [[TMP125]], [[TMP126]] +; CHECK-NEXT: [[TMP130:%.*]] = and <32 x i1> [[TMP124]], [[TMP127]] +; CHECK-NEXT: [[TMP131:%.*]] = or <32 x i1> [[TMP128]], [[TMP129]] +; CHECK-NEXT: [[TMP132:%.*]] = or <32 x i1> [[TMP131]], [[TMP130]] +; CHECK-NEXT: [[TMP133:%.*]] = and <32 x i1> [[TMP125]], [[TMP127]] +; CHECK-NEXT: [[TMP134:%.*]] = bitcast <32 x i1> [[TMP132]] to i32 +; CHECK-NEXT: [[TMP135:%.*]] = bitcast <32 x i1> [[TMP133]] to i32 +; CHECK-NEXT: [[_MSPROP6:%.*]] = or i32 [[_MSPROP5]], [[TMP134]] +; CHECK-NEXT: [[RET6:%.*]] = add i32 [[RET5]], [[TMP135]] +; CHECK-NEXT: [[TMP136:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP137:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP138:%.*]] = and <32 x i1> zeroinitializer, [[TMP136]] +; CHECK-NEXT: [[TMP139:%.*]] = and <32 x i1> splat (i1 true), [[TMP136]] +; CHECK-NEXT: [[TMP140:%.*]] = and <32 x i1> zeroinitializer, [[TMP137]] +; CHECK-NEXT: [[TMP141:%.*]] = or <32 x i1> [[TMP138]], [[TMP139]] +; CHECK-NEXT: [[TMP142:%.*]] = or <32 x i1> [[TMP141]], [[TMP140]] +; CHECK-NEXT: [[TMP143:%.*]] = and <32 x i1> splat (i1 true), [[TMP137]] +; CHECK-NEXT: [[TMP144:%.*]] = bitcast <32 x i1> [[TMP142]] to i32 +; CHECK-NEXT: [[TMP145:%.*]] = bitcast <32 x i1> [[TMP143]] to i32 +; CHECK-NEXT: [[_MSPROP7:%.*]] = or i32 [[_MSPROP6]], [[TMP144]] +; CHECK-NEXT: [[RET7:%.*]] = add i32 [[RET6]], [[TMP145]] +; CHECK-NEXT: store i32 [[_MSPROP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RET7]] +; + %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) + %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask) + %ret1 = add i32 %res0, %res1 + %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask) + %ret2 = add i32 %ret1, %res2 + %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask) + %ret3 = add i32 %ret2, %res3 + %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask) + %ret4 = add i32 %ret3, %res4 + %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask) + %ret5 = add i32 %ret4, %res5 + %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask) + %ret6 = add i32 %ret5, %res6 + %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask) + %ret7 = add i32 %ret6, %res7 + ret i32 %ret7 +} + +declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone + +define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) nounwind #0 { +; CHECK-LABEL: @test_ucmp_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <32 x i16> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <32 x i16> [[TMP4]], splat (i16 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <32 x i16> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <32 x i16> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <32 x i1> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i1> [[_MSPROP_ICMP]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i1> [[TMP9]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i16> [[A0]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP15:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP16:%.*]] = and <32 x i16> [[A1]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i16> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp ult <32 x i16> [[TMP13]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp ult <32 x i16> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = xor <32 x i1> [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp ult <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP22:%.*]] = bitcast <32 x i1> [[TMP20]] to i32 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <32 x i1> [[TMP21]] to i32 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP10]], [[TMP22]] +; CHECK-NEXT: [[RET1:%.*]] = add i32 [[TMP11]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP25:%.*]] = and <32 x i16> [[A0]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = or <32 x i16> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP27:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP28:%.*]] = and <32 x i16> [[A1]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = or <32 x i16> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp ule <32 x i16> [[TMP25]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp ule <32 x i16> [[TMP26]], [[TMP28]] +; CHECK-NEXT: [[TMP32:%.*]] = xor <32 x i1> [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = icmp ule <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP34:%.*]] = bitcast <32 x i1> [[TMP32]] to i32 +; CHECK-NEXT: [[TMP35:%.*]] = bitcast <32 x i1> [[TMP33]] to i32 +; CHECK-NEXT: [[_MSPROP1:%.*]] = or i32 [[_MSPROP]], [[TMP34]] +; CHECK-NEXT: [[RET2:%.*]] = add i32 [[RET1]], [[TMP35]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = or i32 [[_MSPROP1]], 0 +; CHECK-NEXT: [[RET3:%.*]] = add i32 [[RET2]], 0 +; CHECK-NEXT: [[TMP36:%.*]] = xor <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP37:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp ne <32 x i16> [[TMP37]], zeroinitializer +; CHECK-NEXT: [[TMP39:%.*]] = xor <32 x i16> [[TMP37]], splat (i16 -1) +; CHECK-NEXT: [[TMP40:%.*]] = and <32 x i16> [[TMP39]], [[TMP36]] +; CHECK-NEXT: [[TMP41:%.*]] = icmp eq <32 x i16> [[TMP40]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP3:%.*]] = and <32 x i1> [[TMP38]], [[TMP41]] +; CHECK-NEXT: [[TMP42:%.*]] = icmp ne <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP43:%.*]] = bitcast <32 x i1> [[_MSPROP_ICMP3]] to i32 +; CHECK-NEXT: [[TMP44:%.*]] = bitcast <32 x i1> [[TMP42]] to i32 +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP2]], [[TMP43]] +; CHECK-NEXT: [[RET4:%.*]] = add i32 [[RET3]], [[TMP44]] +; CHECK-NEXT: [[TMP45:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP46:%.*]] = and <32 x i16> [[A0]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = or <32 x i16> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP48:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP49:%.*]] = and <32 x i16> [[A1]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = or <32 x i16> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP51:%.*]] = icmp uge <32 x i16> [[TMP46]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = icmp uge <32 x i16> [[TMP47]], [[TMP49]] +; CHECK-NEXT: [[TMP53:%.*]] = xor <32 x i1> [[TMP51]], [[TMP52]] +; CHECK-NEXT: [[TMP54:%.*]] = icmp uge <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <32 x i1> [[TMP53]] to i32 +; CHECK-NEXT: [[TMP56:%.*]] = bitcast <32 x i1> [[TMP54]] to i32 +; CHECK-NEXT: [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[TMP55]] +; CHECK-NEXT: [[RET5:%.*]] = add i32 [[RET4]], [[TMP56]] +; CHECK-NEXT: [[TMP57:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP58:%.*]] = and <32 x i16> [[A0]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = or <32 x i16> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP60:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP61:%.*]] = and <32 x i16> [[A1]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = or <32 x i16> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP63:%.*]] = icmp ugt <32 x i16> [[TMP58]], [[TMP62]] +; CHECK-NEXT: [[TMP64:%.*]] = icmp ugt <32 x i16> [[TMP59]], [[TMP61]] +; CHECK-NEXT: [[TMP65:%.*]] = xor <32 x i1> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = icmp ugt <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP67:%.*]] = bitcast <32 x i1> [[TMP65]] to i32 +; CHECK-NEXT: [[TMP68:%.*]] = bitcast <32 x i1> [[TMP66]] to i32 +; CHECK-NEXT: [[_MSPROP6:%.*]] = or i32 [[_MSPROP5]], [[TMP67]] +; CHECK-NEXT: [[RET6:%.*]] = add i32 [[RET5]], [[TMP68]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = or i32 [[_MSPROP6]], 0 +; CHECK-NEXT: [[RET7:%.*]] = add i32 [[RET6]], -1 +; CHECK-NEXT: store i32 [[_MSPROP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RET7]] +; + %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) + %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1) + %ret1 = add i32 %res0, %res1 + %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1) + %ret2 = add i32 %ret1, %res2 + %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1) + %ret3 = add i32 %ret2, %res3 + %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1) + %ret4 = add i32 %ret3, %res4 + %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1) + %ret5 = add i32 %ret4, %res5 + %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1) + %ret6 = add i32 %ret5, %res6 + %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1) + %ret7 = add i32 %ret6, %res7 + ret i32 %ret7 +} + +define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_ucmp_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = xor <32 x i16> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i16> [[TMP5]], splat (i16 -1) +; CHECK-NEXT: [[TMP8:%.*]] = and <32 x i16> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <32 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <32 x i1> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i1> [[_MSPROP_ICMP]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i1> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = and <32 x i1> [[_MSPROP_ICMP]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <32 x i1> [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i1> [[TMP17]] to i32 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <32 x i1> [[TMP18]] to i32 +; CHECK-NEXT: [[TMP21:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP22:%.*]] = and <32 x i16> [[A0]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = or <32 x i16> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP24:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP25:%.*]] = and <32 x i16> [[A1]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = or <32 x i16> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp ult <32 x i16> [[TMP22]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp ult <32 x i16> [[TMP23]], [[TMP25]] +; CHECK-NEXT: [[TMP29:%.*]] = xor <32 x i1> [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp ult <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP31:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP32:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP33:%.*]] = and <32 x i1> [[TMP29]], [[TMP31]] +; CHECK-NEXT: [[TMP34:%.*]] = and <32 x i1> [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP35:%.*]] = and <32 x i1> [[TMP29]], [[TMP32]] +; CHECK-NEXT: [[TMP36:%.*]] = or <32 x i1> [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP37:%.*]] = or <32 x i1> [[TMP36]], [[TMP35]] +; CHECK-NEXT: [[TMP38:%.*]] = and <32 x i1> [[TMP30]], [[TMP32]] +; CHECK-NEXT: [[TMP39:%.*]] = bitcast <32 x i1> [[TMP37]] to i32 +; CHECK-NEXT: [[TMP40:%.*]] = bitcast <32 x i1> [[TMP38]] to i32 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP19]], [[TMP39]] +; CHECK-NEXT: [[RET1:%.*]] = add i32 [[TMP20]], [[TMP40]] +; CHECK-NEXT: [[TMP41:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP42:%.*]] = and <32 x i16> [[A0]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = or <32 x i16> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP44:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP45:%.*]] = and <32 x i16> [[A1]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = or <32 x i16> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP47:%.*]] = icmp ule <32 x i16> [[TMP42]], [[TMP46]] +; CHECK-NEXT: [[TMP48:%.*]] = icmp ule <32 x i16> [[TMP43]], [[TMP45]] +; CHECK-NEXT: [[TMP49:%.*]] = xor <32 x i1> [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = icmp ule <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP51:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP52:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP53:%.*]] = and <32 x i1> [[TMP49]], [[TMP51]] +; CHECK-NEXT: [[TMP54:%.*]] = and <32 x i1> [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP55:%.*]] = and <32 x i1> [[TMP49]], [[TMP52]] +; CHECK-NEXT: [[TMP56:%.*]] = or <32 x i1> [[TMP53]], [[TMP54]] +; CHECK-NEXT: [[TMP57:%.*]] = or <32 x i1> [[TMP56]], [[TMP55]] +; CHECK-NEXT: [[TMP58:%.*]] = and <32 x i1> [[TMP50]], [[TMP52]] +; CHECK-NEXT: [[TMP59:%.*]] = bitcast <32 x i1> [[TMP57]] to i32 +; CHECK-NEXT: [[TMP60:%.*]] = bitcast <32 x i1> [[TMP58]] to i32 +; CHECK-NEXT: [[_MSPROP1:%.*]] = or i32 [[_MSPROP]], [[TMP59]] +; CHECK-NEXT: [[RET2:%.*]] = add i32 [[RET1]], [[TMP60]] +; CHECK-NEXT: [[TMP61:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP62:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP63:%.*]] = and <32 x i1> zeroinitializer, [[TMP61]] +; CHECK-NEXT: [[TMP64:%.*]] = and <32 x i1> zeroinitializer, [[TMP61]] +; CHECK-NEXT: [[TMP65:%.*]] = and <32 x i1> zeroinitializer, [[TMP62]] +; CHECK-NEXT: [[TMP66:%.*]] = or <32 x i1> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP67:%.*]] = or <32 x i1> [[TMP66]], [[TMP65]] +; CHECK-NEXT: [[TMP68:%.*]] = and <32 x i1> zeroinitializer, [[TMP62]] +; CHECK-NEXT: [[TMP69:%.*]] = bitcast <32 x i1> [[TMP67]] to i32 +; CHECK-NEXT: [[TMP70:%.*]] = bitcast <32 x i1> [[TMP68]] to i32 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or i32 [[_MSPROP1]], [[TMP69]] +; CHECK-NEXT: [[RET3:%.*]] = add i32 [[RET2]], [[TMP70]] +; CHECK-NEXT: [[TMP71:%.*]] = xor <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP72:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp ne <32 x i16> [[TMP72]], zeroinitializer +; CHECK-NEXT: [[TMP74:%.*]] = xor <32 x i16> [[TMP72]], splat (i16 -1) +; CHECK-NEXT: [[TMP75:%.*]] = and <32 x i16> [[TMP74]], [[TMP71]] +; CHECK-NEXT: [[TMP76:%.*]] = icmp eq <32 x i16> [[TMP75]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP3:%.*]] = and <32 x i1> [[TMP73]], [[TMP76]] +; CHECK-NEXT: [[TMP77:%.*]] = icmp ne <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP78:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP79:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP80:%.*]] = and <32 x i1> [[_MSPROP_ICMP3]], [[TMP78]] +; CHECK-NEXT: [[TMP81:%.*]] = and <32 x i1> [[TMP77]], [[TMP78]] +; CHECK-NEXT: [[TMP82:%.*]] = and <32 x i1> [[_MSPROP_ICMP3]], [[TMP79]] +; CHECK-NEXT: [[TMP83:%.*]] = or <32 x i1> [[TMP80]], [[TMP81]] +; CHECK-NEXT: [[TMP84:%.*]] = or <32 x i1> [[TMP83]], [[TMP82]] +; CHECK-NEXT: [[TMP85:%.*]] = and <32 x i1> [[TMP77]], [[TMP79]] +; CHECK-NEXT: [[TMP86:%.*]] = bitcast <32 x i1> [[TMP84]] to i32 +; CHECK-NEXT: [[TMP87:%.*]] = bitcast <32 x i1> [[TMP85]] to i32 +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP2]], [[TMP86]] +; CHECK-NEXT: [[RET4:%.*]] = add i32 [[RET3]], [[TMP87]] +; CHECK-NEXT: [[TMP88:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP89:%.*]] = and <32 x i16> [[A0]], [[TMP88]] +; CHECK-NEXT: [[TMP90:%.*]] = or <32 x i16> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP91:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP92:%.*]] = and <32 x i16> [[A1]], [[TMP91]] +; CHECK-NEXT: [[TMP93:%.*]] = or <32 x i16> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP94:%.*]] = icmp uge <32 x i16> [[TMP89]], [[TMP93]] +; CHECK-NEXT: [[TMP95:%.*]] = icmp uge <32 x i16> [[TMP90]], [[TMP92]] +; CHECK-NEXT: [[TMP96:%.*]] = xor <32 x i1> [[TMP94]], [[TMP95]] +; CHECK-NEXT: [[TMP97:%.*]] = icmp uge <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP98:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP99:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP100:%.*]] = and <32 x i1> [[TMP96]], [[TMP98]] +; CHECK-NEXT: [[TMP101:%.*]] = and <32 x i1> [[TMP97]], [[TMP98]] +; CHECK-NEXT: [[TMP102:%.*]] = and <32 x i1> [[TMP96]], [[TMP99]] +; CHECK-NEXT: [[TMP103:%.*]] = or <32 x i1> [[TMP100]], [[TMP101]] +; CHECK-NEXT: [[TMP104:%.*]] = or <32 x i1> [[TMP103]], [[TMP102]] +; CHECK-NEXT: [[TMP105:%.*]] = and <32 x i1> [[TMP97]], [[TMP99]] +; CHECK-NEXT: [[TMP106:%.*]] = bitcast <32 x i1> [[TMP104]] to i32 +; CHECK-NEXT: [[TMP107:%.*]] = bitcast <32 x i1> [[TMP105]] to i32 +; CHECK-NEXT: [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[TMP106]] +; CHECK-NEXT: [[RET5:%.*]] = add i32 [[RET4]], [[TMP107]] +; CHECK-NEXT: [[TMP108:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP109:%.*]] = and <32 x i16> [[A0]], [[TMP108]] +; CHECK-NEXT: [[TMP110:%.*]] = or <32 x i16> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP111:%.*]] = xor <32 x i16> [[TMP2]], splat (i16 -1) +; CHECK-NEXT: [[TMP112:%.*]] = and <32 x i16> [[A1]], [[TMP111]] +; CHECK-NEXT: [[TMP113:%.*]] = or <32 x i16> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP114:%.*]] = icmp ugt <32 x i16> [[TMP109]], [[TMP113]] +; CHECK-NEXT: [[TMP115:%.*]] = icmp ugt <32 x i16> [[TMP110]], [[TMP112]] +; CHECK-NEXT: [[TMP116:%.*]] = xor <32 x i1> [[TMP114]], [[TMP115]] +; CHECK-NEXT: [[TMP117:%.*]] = icmp ugt <32 x i16> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP118:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP119:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP120:%.*]] = and <32 x i1> [[TMP116]], [[TMP118]] +; CHECK-NEXT: [[TMP121:%.*]] = and <32 x i1> [[TMP117]], [[TMP118]] +; CHECK-NEXT: [[TMP122:%.*]] = and <32 x i1> [[TMP116]], [[TMP119]] +; CHECK-NEXT: [[TMP123:%.*]] = or <32 x i1> [[TMP120]], [[TMP121]] +; CHECK-NEXT: [[TMP124:%.*]] = or <32 x i1> [[TMP123]], [[TMP122]] +; CHECK-NEXT: [[TMP125:%.*]] = and <32 x i1> [[TMP117]], [[TMP119]] +; CHECK-NEXT: [[TMP126:%.*]] = bitcast <32 x i1> [[TMP124]] to i32 +; CHECK-NEXT: [[TMP127:%.*]] = bitcast <32 x i1> [[TMP125]] to i32 +; CHECK-NEXT: [[_MSPROP6:%.*]] = or i32 [[_MSPROP5]], [[TMP126]] +; CHECK-NEXT: [[RET6:%.*]] = add i32 [[RET5]], [[TMP127]] +; CHECK-NEXT: [[TMP128:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP129:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP130:%.*]] = and <32 x i1> zeroinitializer, [[TMP128]] +; CHECK-NEXT: [[TMP131:%.*]] = and <32 x i1> splat (i1 true), [[TMP128]] +; CHECK-NEXT: [[TMP132:%.*]] = and <32 x i1> zeroinitializer, [[TMP129]] +; CHECK-NEXT: [[TMP133:%.*]] = or <32 x i1> [[TMP130]], [[TMP131]] +; CHECK-NEXT: [[TMP134:%.*]] = or <32 x i1> [[TMP133]], [[TMP132]] +; CHECK-NEXT: [[TMP135:%.*]] = and <32 x i1> splat (i1 true), [[TMP129]] +; CHECK-NEXT: [[TMP136:%.*]] = bitcast <32 x i1> [[TMP134]] to i32 +; CHECK-NEXT: [[TMP137:%.*]] = bitcast <32 x i1> [[TMP135]] to i32 +; CHECK-NEXT: [[_MSPROP7:%.*]] = or i32 [[_MSPROP6]], [[TMP136]] +; CHECK-NEXT: [[RET7:%.*]] = add i32 [[RET6]], [[TMP137]] +; CHECK-NEXT: store i32 [[_MSPROP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RET7]] +; + %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) + %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask) + %ret1 = add i32 %res0, %res1 + %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask) + %ret2 = add i32 %ret1, %res2 + %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask) + %ret3 = add i32 %ret2, %res3 + %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask) + %ret4 = add i32 %ret3, %res4 + %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask) + %ret5 = add i32 %ret4, %res5 + %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask) + %ret6 = add i32 %ret5, %res6 + %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask) + %ret7 = add i32 %ret6, %res7 + ret i32 %ret7 +} + +declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone + + +declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <64 x i8> @mm512_avg_epu8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind #0 { +; CHECK-LABEL: @mm512_avg_epu8( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @mm512_mask_avg_epu8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind #0 { +; CHECK-LABEL: @mm512_mask_avg_epu8( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[X3:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <64 x i8> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP11]], <64 x i8> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[TMP5]], <64 x i8> [[X2]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP12]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + ret <64 x i8> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @mm512_avg_epu16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @mm512_avg_epu16( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @mm512_mask_avg_epu16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @mm512_mask_avg_epu16( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pabs_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.abs.v32i16(<32 x i16> [[X0:%.*]], i1 false) +; CHECK-NEXT: store <32 x i16> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP2]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.abs.v32i16(<32 x i16> [[X0:%.*]], i1 false) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X2:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP1]], <32 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], [[X1:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> [[X1]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) + ret <32 x i16> %res +} + +declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64) + +define <64 x i8> @test_int_x86_avx512_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pabs_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> [[X0:%.*]], i1 false) +; CHECK-NEXT: store <64 x i8> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP2]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> [[X0:%.*]], i1 false) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP2]] to <64 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[X2:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP1]], <64 x i8> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <64 x i8> [[TMP4]], [[X1:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <64 x i8> [[TMP8]], [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[TMP10]], <64 x i8> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP4]], <64 x i8> [[X1]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP11]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) + ret <64 x i8> %res +} + +declare i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8>, <64 x i8>, i64) + +define i64 @test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_ptestm_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = and <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = and <64 x i8> [[X0:%.*]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = and <64 x i8> [[TMP1]], [[X1:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or <64 x i8> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = or <64 x i8> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = and <64 x i8> [[X0]], [[X1]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <64 x i8> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <64 x i8> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP11]], splat (i8 -1) +; CHECK-NEXT: [[TMP14:%.*]] = and <64 x i8> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <64 x i8> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <64 x i1> [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <64 x i8> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[X2:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = and <64 x i1> [[_MSPROP_ICMP]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = and <64 x i1> [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = and <64 x i1> [[_MSPROP_ICMP]], [[TMP18]] +; CHECK-NEXT: [[TMP22:%.*]] = or <64 x i1> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or <64 x i1> [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = and <64 x i1> [[TMP16]], [[TMP18]] +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <64 x i1> [[TMP23]] to i64 +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <64 x i1> [[TMP24]] to i64 +; CHECK-NEXT: [[TMP27:%.*]] = and <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP28:%.*]] = and <64 x i8> [[X0]], [[TMP2]] +; CHECK-NEXT: [[TMP29:%.*]] = and <64 x i8> [[TMP1]], [[X1]] +; CHECK-NEXT: [[TMP30:%.*]] = or <64 x i8> [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP31:%.*]] = or <64 x i8> [[TMP30]], [[TMP29]] +; CHECK-NEXT: [[TMP32:%.*]] = and <64 x i8> [[X0]], [[X1]] +; CHECK-NEXT: [[TMP33:%.*]] = xor <64 x i8> [[TMP32]], zeroinitializer +; CHECK-NEXT: [[TMP34:%.*]] = or <64 x i8> [[TMP31]], zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <64 x i8> [[TMP34]], zeroinitializer +; CHECK-NEXT: [[TMP36:%.*]] = xor <64 x i8> [[TMP34]], splat (i8 -1) +; CHECK-NEXT: [[TMP37:%.*]] = and <64 x i8> [[TMP36]], [[TMP33]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp eq <64 x i8> [[TMP37]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP1:%.*]] = and <64 x i1> [[TMP35]], [[TMP38]] +; CHECK-NEXT: [[TMP39:%.*]] = icmp ne <64 x i8> [[TMP32]], zeroinitializer +; CHECK-NEXT: [[TMP40:%.*]] = bitcast <64 x i1> [[_MSPROP_ICMP1]] to i64 +; CHECK-NEXT: [[TMP41:%.*]] = bitcast <64 x i1> [[TMP39]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP25]], [[TMP40]] +; CHECK-NEXT: [[RES2:%.*]] = add i64 [[TMP26]], [[TMP41]] +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[RES2]] +; + %res = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) + %res1 = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1) + %res2 = add i64 %res, %res1 + ret i64 %res2 +} + +declare i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16>, <32 x i16>, i32) + +define i32 @test_int_x86_avx512_ptestm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_ptestm_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = and <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = and <32 x i16> [[X0:%.*]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = and <32 x i16> [[TMP1]], [[X1:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or <32 x i16> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i16> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = and <32 x i16> [[X0]], [[X1]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP11]], splat (i16 -1) +; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i16> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <32 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <32 x i1> [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32 [[X2:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = and <32 x i1> [[_MSPROP_ICMP]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = and <32 x i1> [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = and <32 x i1> [[_MSPROP_ICMP]], [[TMP18]] +; CHECK-NEXT: [[TMP22:%.*]] = or <32 x i1> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or <32 x i1> [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = and <32 x i1> [[TMP16]], [[TMP18]] +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <32 x i1> [[TMP23]] to i32 +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <32 x i1> [[TMP24]] to i32 +; CHECK-NEXT: [[TMP27:%.*]] = and <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP28:%.*]] = and <32 x i16> [[X0]], [[TMP2]] +; CHECK-NEXT: [[TMP29:%.*]] = and <32 x i16> [[TMP1]], [[X1]] +; CHECK-NEXT: [[TMP30:%.*]] = or <32 x i16> [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP31:%.*]] = or <32 x i16> [[TMP30]], [[TMP29]] +; CHECK-NEXT: [[TMP32:%.*]] = and <32 x i16> [[X0]], [[X1]] +; CHECK-NEXT: [[TMP33:%.*]] = xor <32 x i16> [[TMP32]], zeroinitializer +; CHECK-NEXT: [[TMP34:%.*]] = or <32 x i16> [[TMP31]], zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <32 x i16> [[TMP34]], zeroinitializer +; CHECK-NEXT: [[TMP36:%.*]] = xor <32 x i16> [[TMP34]], splat (i16 -1) +; CHECK-NEXT: [[TMP37:%.*]] = and <32 x i16> [[TMP36]], [[TMP33]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp eq <32 x i16> [[TMP37]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP1:%.*]] = and <32 x i1> [[TMP35]], [[TMP38]] +; CHECK-NEXT: [[TMP39:%.*]] = icmp ne <32 x i16> [[TMP32]], zeroinitializer +; CHECK-NEXT: [[TMP40:%.*]] = bitcast <32 x i1> [[_MSPROP_ICMP1]] to i32 +; CHECK-NEXT: [[TMP41:%.*]] = bitcast <32 x i1> [[TMP39]] to i32 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP25]], [[TMP40]] +; CHECK-NEXT: [[RES2:%.*]] = add i32 [[TMP26]], [[TMP41]] +; CHECK-NEXT: store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES2]] +; + %res = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) + %res1 = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1) + %res2 = add i32 %res, %res1 + ret i32 %res2 +} + +declare i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8>, <64 x i8>, i64 %x2) + +define i64 @test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_ptestnm_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = and <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = and <64 x i8> [[X0:%.*]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = and <64 x i8> [[TMP1]], [[X1:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or <64 x i8> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = or <64 x i8> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = and <64 x i8> [[X0]], [[X1]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <64 x i8> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <64 x i8> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP11]], splat (i8 -1) +; CHECK-NEXT: [[TMP14:%.*]] = and <64 x i8> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <64 x i8> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <64 x i1> [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq <64 x i8> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[X2:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = and <64 x i1> [[_MSPROP_ICMP]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = and <64 x i1> [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = and <64 x i1> [[_MSPROP_ICMP]], [[TMP18]] +; CHECK-NEXT: [[TMP22:%.*]] = or <64 x i1> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or <64 x i1> [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = and <64 x i1> [[TMP16]], [[TMP18]] +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <64 x i1> [[TMP23]] to i64 +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <64 x i1> [[TMP24]] to i64 +; CHECK-NEXT: [[TMP27:%.*]] = and <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP28:%.*]] = and <64 x i8> [[X0]], [[TMP2]] +; CHECK-NEXT: [[TMP29:%.*]] = and <64 x i8> [[TMP1]], [[X1]] +; CHECK-NEXT: [[TMP30:%.*]] = or <64 x i8> [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP31:%.*]] = or <64 x i8> [[TMP30]], [[TMP29]] +; CHECK-NEXT: [[TMP32:%.*]] = and <64 x i8> [[X0]], [[X1]] +; CHECK-NEXT: [[TMP33:%.*]] = xor <64 x i8> [[TMP32]], zeroinitializer +; CHECK-NEXT: [[TMP34:%.*]] = or <64 x i8> [[TMP31]], zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <64 x i8> [[TMP34]], zeroinitializer +; CHECK-NEXT: [[TMP36:%.*]] = xor <64 x i8> [[TMP34]], splat (i8 -1) +; CHECK-NEXT: [[TMP37:%.*]] = and <64 x i8> [[TMP36]], [[TMP33]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp eq <64 x i8> [[TMP37]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP1:%.*]] = and <64 x i1> [[TMP35]], [[TMP38]] +; CHECK-NEXT: [[TMP39:%.*]] = icmp eq <64 x i8> [[TMP32]], zeroinitializer +; CHECK-NEXT: [[TMP40:%.*]] = bitcast <64 x i1> [[_MSPROP_ICMP1]] to i64 +; CHECK-NEXT: [[TMP41:%.*]] = bitcast <64 x i1> [[TMP39]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP25]], [[TMP40]] +; CHECK-NEXT: [[RES2:%.*]] = add i64 [[TMP26]], [[TMP41]] +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[RES2]] +; + %res = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) + %res1 = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1) + %res2 = add i64 %res, %res1 + ret i64 %res2 +} + +declare i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16>, <32 x i16>, i32 %x2) + +define i32 @test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_ptestnm_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = and <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = and <32 x i16> [[X0:%.*]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = and <32 x i16> [[TMP1]], [[X1:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or <32 x i16> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i16> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = and <32 x i16> [[X0]], [[X1]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP11]], splat (i16 -1) +; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i16> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <32 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <32 x i1> [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32 [[X2:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = and <32 x i1> [[_MSPROP_ICMP]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = and <32 x i1> [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = and <32 x i1> [[_MSPROP_ICMP]], [[TMP18]] +; CHECK-NEXT: [[TMP22:%.*]] = or <32 x i1> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or <32 x i1> [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = and <32 x i1> [[TMP16]], [[TMP18]] +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <32 x i1> [[TMP23]] to i32 +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <32 x i1> [[TMP24]] to i32 +; CHECK-NEXT: [[TMP27:%.*]] = and <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP28:%.*]] = and <32 x i16> [[X0]], [[TMP2]] +; CHECK-NEXT: [[TMP29:%.*]] = and <32 x i16> [[TMP1]], [[X1]] +; CHECK-NEXT: [[TMP30:%.*]] = or <32 x i16> [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP31:%.*]] = or <32 x i16> [[TMP30]], [[TMP29]] +; CHECK-NEXT: [[TMP32:%.*]] = and <32 x i16> [[X0]], [[X1]] +; CHECK-NEXT: [[TMP33:%.*]] = xor <32 x i16> [[TMP32]], zeroinitializer +; CHECK-NEXT: [[TMP34:%.*]] = or <32 x i16> [[TMP31]], zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <32 x i16> [[TMP34]], zeroinitializer +; CHECK-NEXT: [[TMP36:%.*]] = xor <32 x i16> [[TMP34]], splat (i16 -1) +; CHECK-NEXT: [[TMP37:%.*]] = and <32 x i16> [[TMP36]], [[TMP33]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp eq <32 x i16> [[TMP37]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP1:%.*]] = and <32 x i1> [[TMP35]], [[TMP38]] +; CHECK-NEXT: [[TMP39:%.*]] = icmp eq <32 x i16> [[TMP32]], zeroinitializer +; CHECK-NEXT: [[TMP40:%.*]] = bitcast <32 x i1> [[_MSPROP_ICMP1]] to i32 +; CHECK-NEXT: [[TMP41:%.*]] = bitcast <32 x i1> [[TMP39]] to i32 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP25]], [[TMP40]] +; CHECK-NEXT: [[RES2:%.*]] = add i32 [[TMP26]], [[TMP41]] +; CHECK-NEXT: store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES2]] +; + %res = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) + %res1 = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1) + %res2 = add i32 %res, %res1 + ret i32 %res2 +} + +declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>) + +define i64 @test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_cvtb2mask_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = xor <64 x i8> [[X0:%.*]], splat (i8 -128) +; CHECK-NEXT: [[TMP3:%.*]] = xor <64 x i8> [[TMP1]], splat (i8 -1) +; CHECK-NEXT: [[TMP4:%.*]] = and <64 x i8> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = or <64 x i8> [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <64 x i8> [[TMP4]], splat (i8 -128) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ult <64 x i8> [[TMP5]], splat (i8 -128) +; CHECK-NEXT: [[TMP8:%.*]] = xor <64 x i1> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp slt <64 x i8> [[X0]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <64 x i1> [[TMP8]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <64 x i1> [[TMP9]] to i64 +; CHECK-NEXT: store i64 [[TMP10]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP11]] +; + %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0) + ret i64 %res +} + +declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>) + +define i32 @test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_cvtw2mask_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = xor <32 x i16> [[X0:%.*]], splat (i16 -32768) +; CHECK-NEXT: [[TMP3:%.*]] = xor <32 x i16> [[TMP1]], splat (i16 -1) +; CHECK-NEXT: [[TMP4:%.*]] = and <32 x i16> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = or <32 x i16> [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <32 x i16> [[TMP4]], splat (i16 -32768) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ult <32 x i16> [[TMP5]], splat (i16 -32768) +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i1> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp slt <32 x i16> [[X0]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i1> [[TMP8]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i1> [[TMP9]] to i32 +; CHECK-NEXT: store i32 [[TMP10]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP11]] +; + %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0) + ret i32 %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmulhu_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhu_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmulh_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmulh_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmulhr_sw_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhr_sw_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmaddubs_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <64 x i8> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <64 x i8> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP7]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaddubs_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <64 x i8> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <64 x i8> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[X2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP16]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16) + +define <16 x i32> @test_int_x86_avx512_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmaddw_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP7]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaddw_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], [[X2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP16]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) + ret <16 x i32> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_permvar_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_maskz_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermt2var_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i16 [[TMP4]], 31 +; CHECK-NEXT: [[TMP6:%.*]] = or i16 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = and i16 [[TMP7]], 31 +; CHECK-NEXT: [[TMP9:%.*]] = or i16 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP11:%.*]] = and i16 [[TMP10]], 31 +; CHECK-NEXT: [[TMP12:%.*]] = or i16 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP14:%.*]] = and i16 [[TMP13]], 31 +; CHECK-NEXT: [[TMP15:%.*]] = or i16 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP17:%.*]] = and i16 [[TMP16]], 31 +; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP20:%.*]] = and i16 [[TMP19]], 31 +; CHECK-NEXT: [[TMP21:%.*]] = or i16 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP22]], 31 +; CHECK-NEXT: [[TMP24:%.*]] = or i16 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP26:%.*]] = and i16 [[TMP25]], 31 +; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP29:%.*]] = and i16 [[TMP28]], 31 +; CHECK-NEXT: [[TMP30:%.*]] = or i16 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9 +; CHECK-NEXT: [[TMP32:%.*]] = and i16 [[TMP31]], 31 +; CHECK-NEXT: [[TMP33:%.*]] = or i16 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP35:%.*]] = and i16 [[TMP34]], 31 +; CHECK-NEXT: [[TMP36:%.*]] = or i16 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11 +; CHECK-NEXT: [[TMP38:%.*]] = and i16 [[TMP37]], 31 +; CHECK-NEXT: [[TMP39:%.*]] = or i16 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP41:%.*]] = and i16 [[TMP40]], 31 +; CHECK-NEXT: [[TMP42:%.*]] = or i16 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13 +; CHECK-NEXT: [[TMP44:%.*]] = and i16 [[TMP43]], 31 +; CHECK-NEXT: [[TMP45:%.*]] = or i16 [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14 +; CHECK-NEXT: [[TMP47:%.*]] = and i16 [[TMP46]], 31 +; CHECK-NEXT: [[TMP48:%.*]] = or i16 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15 +; CHECK-NEXT: [[TMP50:%.*]] = and i16 [[TMP49]], 31 +; CHECK-NEXT: [[TMP51:%.*]] = or i16 [[TMP49]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16 +; CHECK-NEXT: [[TMP53:%.*]] = and i16 [[TMP52]], 31 +; CHECK-NEXT: [[TMP54:%.*]] = or i16 [[TMP52]], [[TMP53]] +; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17 +; CHECK-NEXT: [[TMP56:%.*]] = and i16 [[TMP55]], 31 +; CHECK-NEXT: [[TMP57:%.*]] = or i16 [[TMP55]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18 +; CHECK-NEXT: [[TMP59:%.*]] = and i16 [[TMP58]], 31 +; CHECK-NEXT: [[TMP60:%.*]] = or i16 [[TMP58]], [[TMP59]] +; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19 +; CHECK-NEXT: [[TMP62:%.*]] = and i16 [[TMP61]], 31 +; CHECK-NEXT: [[TMP63:%.*]] = or i16 [[TMP61]], [[TMP62]] +; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20 +; CHECK-NEXT: [[TMP65:%.*]] = and i16 [[TMP64]], 31 +; CHECK-NEXT: [[TMP66:%.*]] = or i16 [[TMP64]], [[TMP65]] +; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21 +; CHECK-NEXT: [[TMP68:%.*]] = and i16 [[TMP67]], 31 +; CHECK-NEXT: [[TMP69:%.*]] = or i16 [[TMP67]], [[TMP68]] +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22 +; CHECK-NEXT: [[TMP71:%.*]] = and i16 [[TMP70]], 31 +; CHECK-NEXT: [[TMP72:%.*]] = or i16 [[TMP70]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23 +; CHECK-NEXT: [[TMP74:%.*]] = and i16 [[TMP73]], 31 +; CHECK-NEXT: [[TMP75:%.*]] = or i16 [[TMP73]], [[TMP74]] +; CHECK-NEXT: [[TMP76:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24 +; CHECK-NEXT: [[TMP77:%.*]] = and i16 [[TMP76]], 31 +; CHECK-NEXT: [[TMP78:%.*]] = or i16 [[TMP76]], [[TMP77]] +; CHECK-NEXT: [[TMP79:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25 +; CHECK-NEXT: [[TMP80:%.*]] = and i16 [[TMP79]], 31 +; CHECK-NEXT: [[TMP81:%.*]] = or i16 [[TMP79]], [[TMP80]] +; CHECK-NEXT: [[TMP82:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26 +; CHECK-NEXT: [[TMP83:%.*]] = and i16 [[TMP82]], 31 +; CHECK-NEXT: [[TMP84:%.*]] = or i16 [[TMP82]], [[TMP83]] +; CHECK-NEXT: [[TMP85:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27 +; CHECK-NEXT: [[TMP86:%.*]] = and i16 [[TMP85]], 31 +; CHECK-NEXT: [[TMP87:%.*]] = or i16 [[TMP85]], [[TMP86]] +; CHECK-NEXT: [[TMP88:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28 +; CHECK-NEXT: [[TMP89:%.*]] = and i16 [[TMP88]], 31 +; CHECK-NEXT: [[TMP90:%.*]] = or i16 [[TMP88]], [[TMP89]] +; CHECK-NEXT: [[TMP91:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29 +; CHECK-NEXT: [[TMP92:%.*]] = and i16 [[TMP91]], 31 +; CHECK-NEXT: [[TMP93:%.*]] = or i16 [[TMP91]], [[TMP92]] +; CHECK-NEXT: [[TMP94:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30 +; CHECK-NEXT: [[TMP95:%.*]] = and i16 [[TMP94]], 31 +; CHECK-NEXT: [[TMP96:%.*]] = or i16 [[TMP94]], [[TMP95]] +; CHECK-NEXT: [[TMP97:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31 +; CHECK-NEXT: [[TMP98:%.*]] = and i16 [[TMP97]], 31 +; CHECK-NEXT: [[TMP99:%.*]] = or i16 [[TMP97]], [[TMP98]] +; CHECK-NEXT: [[TMP100:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0:%.*]], <32 x i16> [[TMP2]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP99]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP101:%.*]], label [[TMP102:%.*]], !prof [[PROF1]] +; CHECK: 101: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 102: +; CHECK-NEXT: [[TMP103:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X1:%.*]], <32 x i16> [[X0]], <32 x i16> [[X2:%.*]]) +; CHECK-NEXT: store <32 x i16> [[TMP100]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP103]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = and i16 [[TMP5]], 31 +; CHECK-NEXT: [[TMP7:%.*]] = or i16 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = and i16 [[TMP8]], 31 +; CHECK-NEXT: [[TMP10:%.*]] = or i16 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP12:%.*]] = and i16 [[TMP11]], 31 +; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 31 +; CHECK-NEXT: [[TMP16:%.*]] = or i16 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP18:%.*]] = and i16 [[TMP17]], 31 +; CHECK-NEXT: [[TMP19:%.*]] = or i16 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 31 +; CHECK-NEXT: [[TMP22:%.*]] = or i16 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP24:%.*]] = and i16 [[TMP23]], 31 +; CHECK-NEXT: [[TMP25:%.*]] = or i16 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP27:%.*]] = and i16 [[TMP26]], 31 +; CHECK-NEXT: [[TMP28:%.*]] = or i16 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP29]], 31 +; CHECK-NEXT: [[TMP31:%.*]] = or i16 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9 +; CHECK-NEXT: [[TMP33:%.*]] = and i16 [[TMP32]], 31 +; CHECK-NEXT: [[TMP34:%.*]] = or i16 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP36:%.*]] = and i16 [[TMP35]], 31 +; CHECK-NEXT: [[TMP37:%.*]] = or i16 [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11 +; CHECK-NEXT: [[TMP39:%.*]] = and i16 [[TMP38]], 31 +; CHECK-NEXT: [[TMP40:%.*]] = or i16 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP42:%.*]] = and i16 [[TMP41]], 31 +; CHECK-NEXT: [[TMP43:%.*]] = or i16 [[TMP41]], [[TMP42]] +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13 +; CHECK-NEXT: [[TMP45:%.*]] = and i16 [[TMP44]], 31 +; CHECK-NEXT: [[TMP46:%.*]] = or i16 [[TMP44]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14 +; CHECK-NEXT: [[TMP48:%.*]] = and i16 [[TMP47]], 31 +; CHECK-NEXT: [[TMP49:%.*]] = or i16 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15 +; CHECK-NEXT: [[TMP51:%.*]] = and i16 [[TMP50]], 31 +; CHECK-NEXT: [[TMP52:%.*]] = or i16 [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16 +; CHECK-NEXT: [[TMP54:%.*]] = and i16 [[TMP53]], 31 +; CHECK-NEXT: [[TMP55:%.*]] = or i16 [[TMP53]], [[TMP54]] +; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17 +; CHECK-NEXT: [[TMP57:%.*]] = and i16 [[TMP56]], 31 +; CHECK-NEXT: [[TMP58:%.*]] = or i16 [[TMP56]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18 +; CHECK-NEXT: [[TMP60:%.*]] = and i16 [[TMP59]], 31 +; CHECK-NEXT: [[TMP61:%.*]] = or i16 [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19 +; CHECK-NEXT: [[TMP63:%.*]] = and i16 [[TMP62]], 31 +; CHECK-NEXT: [[TMP64:%.*]] = or i16 [[TMP62]], [[TMP63]] +; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20 +; CHECK-NEXT: [[TMP66:%.*]] = and i16 [[TMP65]], 31 +; CHECK-NEXT: [[TMP67:%.*]] = or i16 [[TMP65]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21 +; CHECK-NEXT: [[TMP69:%.*]] = and i16 [[TMP68]], 31 +; CHECK-NEXT: [[TMP70:%.*]] = or i16 [[TMP68]], [[TMP69]] +; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22 +; CHECK-NEXT: [[TMP72:%.*]] = and i16 [[TMP71]], 31 +; CHECK-NEXT: [[TMP73:%.*]] = or i16 [[TMP71]], [[TMP72]] +; CHECK-NEXT: [[TMP74:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23 +; CHECK-NEXT: [[TMP75:%.*]] = and i16 [[TMP74]], 31 +; CHECK-NEXT: [[TMP76:%.*]] = or i16 [[TMP74]], [[TMP75]] +; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24 +; CHECK-NEXT: [[TMP78:%.*]] = and i16 [[TMP77]], 31 +; CHECK-NEXT: [[TMP79:%.*]] = or i16 [[TMP77]], [[TMP78]] +; CHECK-NEXT: [[TMP80:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25 +; CHECK-NEXT: [[TMP81:%.*]] = and i16 [[TMP80]], 31 +; CHECK-NEXT: [[TMP82:%.*]] = or i16 [[TMP80]], [[TMP81]] +; CHECK-NEXT: [[TMP83:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26 +; CHECK-NEXT: [[TMP84:%.*]] = and i16 [[TMP83]], 31 +; CHECK-NEXT: [[TMP85:%.*]] = or i16 [[TMP83]], [[TMP84]] +; CHECK-NEXT: [[TMP86:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27 +; CHECK-NEXT: [[TMP87:%.*]] = and i16 [[TMP86]], 31 +; CHECK-NEXT: [[TMP88:%.*]] = or i16 [[TMP86]], [[TMP87]] +; CHECK-NEXT: [[TMP89:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28 +; CHECK-NEXT: [[TMP90:%.*]] = and i16 [[TMP89]], 31 +; CHECK-NEXT: [[TMP91:%.*]] = or i16 [[TMP89]], [[TMP90]] +; CHECK-NEXT: [[TMP92:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29 +; CHECK-NEXT: [[TMP93:%.*]] = and i16 [[TMP92]], 31 +; CHECK-NEXT: [[TMP94:%.*]] = or i16 [[TMP92]], [[TMP93]] +; CHECK-NEXT: [[TMP95:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30 +; CHECK-NEXT: [[TMP96:%.*]] = and i16 [[TMP95]], 31 +; CHECK-NEXT: [[TMP97:%.*]] = or i16 [[TMP95]], [[TMP96]] +; CHECK-NEXT: [[TMP98:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31 +; CHECK-NEXT: [[TMP99:%.*]] = and i16 [[TMP98]], 31 +; CHECK-NEXT: [[TMP100:%.*]] = or i16 [[TMP98]], [[TMP99]] +; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0:%.*]], <32 x i16> [[TMP2]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP100]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP102:%.*]], label [[TMP103:%.*]], !prof [[PROF1]] +; CHECK: 102: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 103: +; CHECK-NEXT: [[TMP104:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X1:%.*]], <32 x i16> [[X0]], <32 x i16> [[X2:%.*]]) +; CHECK-NEXT: [[TMP105:%.*]] = bitcast i32 [[TMP4]] to <32 x i1> +; CHECK-NEXT: [[TMP106:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP107:%.*]] = select <32 x i1> [[TMP106]], <32 x i16> [[TMP101]], <32 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP108:%.*]] = xor <32 x i16> [[TMP104]], [[X1]] +; CHECK-NEXT: [[TMP109:%.*]] = or <32 x i16> [[TMP108]], [[TMP101]] +; CHECK-NEXT: [[TMP110:%.*]] = or <32 x i16> [[TMP109]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP105]], <32 x i16> [[TMP110]], <32 x i16> [[TMP107]] +; CHECK-NEXT: [[TMP111:%.*]] = select <32 x i1> [[TMP106]], <32 x i16> [[TMP104]], <32 x i16> [[X1]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP111]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = and i16 [[TMP5]], 31 +; CHECK-NEXT: [[TMP7:%.*]] = or i16 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = and i16 [[TMP8]], 31 +; CHECK-NEXT: [[TMP10:%.*]] = or i16 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP12:%.*]] = and i16 [[TMP11]], 31 +; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 31 +; CHECK-NEXT: [[TMP16:%.*]] = or i16 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP18:%.*]] = and i16 [[TMP17]], 31 +; CHECK-NEXT: [[TMP19:%.*]] = or i16 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 31 +; CHECK-NEXT: [[TMP22:%.*]] = or i16 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP24:%.*]] = and i16 [[TMP23]], 31 +; CHECK-NEXT: [[TMP25:%.*]] = or i16 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP27:%.*]] = and i16 [[TMP26]], 31 +; CHECK-NEXT: [[TMP28:%.*]] = or i16 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP29]], 31 +; CHECK-NEXT: [[TMP31:%.*]] = or i16 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9 +; CHECK-NEXT: [[TMP33:%.*]] = and i16 [[TMP32]], 31 +; CHECK-NEXT: [[TMP34:%.*]] = or i16 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP36:%.*]] = and i16 [[TMP35]], 31 +; CHECK-NEXT: [[TMP37:%.*]] = or i16 [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11 +; CHECK-NEXT: [[TMP39:%.*]] = and i16 [[TMP38]], 31 +; CHECK-NEXT: [[TMP40:%.*]] = or i16 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP42:%.*]] = and i16 [[TMP41]], 31 +; CHECK-NEXT: [[TMP43:%.*]] = or i16 [[TMP41]], [[TMP42]] +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13 +; CHECK-NEXT: [[TMP45:%.*]] = and i16 [[TMP44]], 31 +; CHECK-NEXT: [[TMP46:%.*]] = or i16 [[TMP44]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14 +; CHECK-NEXT: [[TMP48:%.*]] = and i16 [[TMP47]], 31 +; CHECK-NEXT: [[TMP49:%.*]] = or i16 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15 +; CHECK-NEXT: [[TMP51:%.*]] = and i16 [[TMP50]], 31 +; CHECK-NEXT: [[TMP52:%.*]] = or i16 [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16 +; CHECK-NEXT: [[TMP54:%.*]] = and i16 [[TMP53]], 31 +; CHECK-NEXT: [[TMP55:%.*]] = or i16 [[TMP53]], [[TMP54]] +; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17 +; CHECK-NEXT: [[TMP57:%.*]] = and i16 [[TMP56]], 31 +; CHECK-NEXT: [[TMP58:%.*]] = or i16 [[TMP56]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18 +; CHECK-NEXT: [[TMP60:%.*]] = and i16 [[TMP59]], 31 +; CHECK-NEXT: [[TMP61:%.*]] = or i16 [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19 +; CHECK-NEXT: [[TMP63:%.*]] = and i16 [[TMP62]], 31 +; CHECK-NEXT: [[TMP64:%.*]] = or i16 [[TMP62]], [[TMP63]] +; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20 +; CHECK-NEXT: [[TMP66:%.*]] = and i16 [[TMP65]], 31 +; CHECK-NEXT: [[TMP67:%.*]] = or i16 [[TMP65]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21 +; CHECK-NEXT: [[TMP69:%.*]] = and i16 [[TMP68]], 31 +; CHECK-NEXT: [[TMP70:%.*]] = or i16 [[TMP68]], [[TMP69]] +; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22 +; CHECK-NEXT: [[TMP72:%.*]] = and i16 [[TMP71]], 31 +; CHECK-NEXT: [[TMP73:%.*]] = or i16 [[TMP71]], [[TMP72]] +; CHECK-NEXT: [[TMP74:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23 +; CHECK-NEXT: [[TMP75:%.*]] = and i16 [[TMP74]], 31 +; CHECK-NEXT: [[TMP76:%.*]] = or i16 [[TMP74]], [[TMP75]] +; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24 +; CHECK-NEXT: [[TMP78:%.*]] = and i16 [[TMP77]], 31 +; CHECK-NEXT: [[TMP79:%.*]] = or i16 [[TMP77]], [[TMP78]] +; CHECK-NEXT: [[TMP80:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25 +; CHECK-NEXT: [[TMP81:%.*]] = and i16 [[TMP80]], 31 +; CHECK-NEXT: [[TMP82:%.*]] = or i16 [[TMP80]], [[TMP81]] +; CHECK-NEXT: [[TMP83:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26 +; CHECK-NEXT: [[TMP84:%.*]] = and i16 [[TMP83]], 31 +; CHECK-NEXT: [[TMP85:%.*]] = or i16 [[TMP83]], [[TMP84]] +; CHECK-NEXT: [[TMP86:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27 +; CHECK-NEXT: [[TMP87:%.*]] = and i16 [[TMP86]], 31 +; CHECK-NEXT: [[TMP88:%.*]] = or i16 [[TMP86]], [[TMP87]] +; CHECK-NEXT: [[TMP89:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28 +; CHECK-NEXT: [[TMP90:%.*]] = and i16 [[TMP89]], 31 +; CHECK-NEXT: [[TMP91:%.*]] = or i16 [[TMP89]], [[TMP90]] +; CHECK-NEXT: [[TMP92:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29 +; CHECK-NEXT: [[TMP93:%.*]] = and i16 [[TMP92]], 31 +; CHECK-NEXT: [[TMP94:%.*]] = or i16 [[TMP92]], [[TMP93]] +; CHECK-NEXT: [[TMP95:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30 +; CHECK-NEXT: [[TMP96:%.*]] = and i16 [[TMP95]], 31 +; CHECK-NEXT: [[TMP97:%.*]] = or i16 [[TMP95]], [[TMP96]] +; CHECK-NEXT: [[TMP98:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31 +; CHECK-NEXT: [[TMP99:%.*]] = and i16 [[TMP98]], 31 +; CHECK-NEXT: [[TMP100:%.*]] = or i16 [[TMP98]], [[TMP99]] +; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0:%.*]], <32 x i16> [[TMP2]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP100]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP102:%.*]], label [[TMP103:%.*]], !prof [[PROF1]] +; CHECK: 102: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 103: +; CHECK-NEXT: [[TMP104:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X1:%.*]], <32 x i16> [[X0]], <32 x i16> [[X2:%.*]]) +; CHECK-NEXT: [[TMP105:%.*]] = bitcast i32 [[TMP4]] to <32 x i1> +; CHECK-NEXT: [[TMP106:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP107:%.*]] = select <32 x i1> [[TMP106]], <32 x i16> [[TMP101]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP108:%.*]] = xor <32 x i16> [[TMP104]], zeroinitializer +; CHECK-NEXT: [[TMP109:%.*]] = or <32 x i16> [[TMP108]], [[TMP101]] +; CHECK-NEXT: [[TMP110:%.*]] = or <32 x i16> [[TMP109]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP105]], <32 x i16> [[TMP110]], <32 x i16> [[TMP107]] +; CHECK-NEXT: [[TMP111:%.*]] = select <32 x i1> [[TMP106]], <32 x i16> [[TMP104]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP111]] +; + %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i16 [[TMP4]], 31 +; CHECK-NEXT: [[TMP6:%.*]] = or i16 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = and i16 [[TMP7]], 31 +; CHECK-NEXT: [[TMP9:%.*]] = or i16 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP11:%.*]] = and i16 [[TMP10]], 31 +; CHECK-NEXT: [[TMP12:%.*]] = or i16 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP14:%.*]] = and i16 [[TMP13]], 31 +; CHECK-NEXT: [[TMP15:%.*]] = or i16 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP17:%.*]] = and i16 [[TMP16]], 31 +; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP20:%.*]] = and i16 [[TMP19]], 31 +; CHECK-NEXT: [[TMP21:%.*]] = or i16 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP22]], 31 +; CHECK-NEXT: [[TMP24:%.*]] = or i16 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP26:%.*]] = and i16 [[TMP25]], 31 +; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP29:%.*]] = and i16 [[TMP28]], 31 +; CHECK-NEXT: [[TMP30:%.*]] = or i16 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9 +; CHECK-NEXT: [[TMP32:%.*]] = and i16 [[TMP31]], 31 +; CHECK-NEXT: [[TMP33:%.*]] = or i16 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP35:%.*]] = and i16 [[TMP34]], 31 +; CHECK-NEXT: [[TMP36:%.*]] = or i16 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11 +; CHECK-NEXT: [[TMP38:%.*]] = and i16 [[TMP37]], 31 +; CHECK-NEXT: [[TMP39:%.*]] = or i16 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP41:%.*]] = and i16 [[TMP40]], 31 +; CHECK-NEXT: [[TMP42:%.*]] = or i16 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13 +; CHECK-NEXT: [[TMP44:%.*]] = and i16 [[TMP43]], 31 +; CHECK-NEXT: [[TMP45:%.*]] = or i16 [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14 +; CHECK-NEXT: [[TMP47:%.*]] = and i16 [[TMP46]], 31 +; CHECK-NEXT: [[TMP48:%.*]] = or i16 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15 +; CHECK-NEXT: [[TMP50:%.*]] = and i16 [[TMP49]], 31 +; CHECK-NEXT: [[TMP51:%.*]] = or i16 [[TMP49]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16 +; CHECK-NEXT: [[TMP53:%.*]] = and i16 [[TMP52]], 31 +; CHECK-NEXT: [[TMP54:%.*]] = or i16 [[TMP52]], [[TMP53]] +; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17 +; CHECK-NEXT: [[TMP56:%.*]] = and i16 [[TMP55]], 31 +; CHECK-NEXT: [[TMP57:%.*]] = or i16 [[TMP55]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18 +; CHECK-NEXT: [[TMP59:%.*]] = and i16 [[TMP58]], 31 +; CHECK-NEXT: [[TMP60:%.*]] = or i16 [[TMP58]], [[TMP59]] +; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19 +; CHECK-NEXT: [[TMP62:%.*]] = and i16 [[TMP61]], 31 +; CHECK-NEXT: [[TMP63:%.*]] = or i16 [[TMP61]], [[TMP62]] +; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20 +; CHECK-NEXT: [[TMP65:%.*]] = and i16 [[TMP64]], 31 +; CHECK-NEXT: [[TMP66:%.*]] = or i16 [[TMP64]], [[TMP65]] +; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21 +; CHECK-NEXT: [[TMP68:%.*]] = and i16 [[TMP67]], 31 +; CHECK-NEXT: [[TMP69:%.*]] = or i16 [[TMP67]], [[TMP68]] +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22 +; CHECK-NEXT: [[TMP71:%.*]] = and i16 [[TMP70]], 31 +; CHECK-NEXT: [[TMP72:%.*]] = or i16 [[TMP70]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23 +; CHECK-NEXT: [[TMP74:%.*]] = and i16 [[TMP73]], 31 +; CHECK-NEXT: [[TMP75:%.*]] = or i16 [[TMP73]], [[TMP74]] +; CHECK-NEXT: [[TMP76:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24 +; CHECK-NEXT: [[TMP77:%.*]] = and i16 [[TMP76]], 31 +; CHECK-NEXT: [[TMP78:%.*]] = or i16 [[TMP76]], [[TMP77]] +; CHECK-NEXT: [[TMP79:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25 +; CHECK-NEXT: [[TMP80:%.*]] = and i16 [[TMP79]], 31 +; CHECK-NEXT: [[TMP81:%.*]] = or i16 [[TMP79]], [[TMP80]] +; CHECK-NEXT: [[TMP82:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26 +; CHECK-NEXT: [[TMP83:%.*]] = and i16 [[TMP82]], 31 +; CHECK-NEXT: [[TMP84:%.*]] = or i16 [[TMP82]], [[TMP83]] +; CHECK-NEXT: [[TMP85:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27 +; CHECK-NEXT: [[TMP86:%.*]] = and i16 [[TMP85]], 31 +; CHECK-NEXT: [[TMP87:%.*]] = or i16 [[TMP85]], [[TMP86]] +; CHECK-NEXT: [[TMP88:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28 +; CHECK-NEXT: [[TMP89:%.*]] = and i16 [[TMP88]], 31 +; CHECK-NEXT: [[TMP90:%.*]] = or i16 [[TMP88]], [[TMP89]] +; CHECK-NEXT: [[TMP91:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29 +; CHECK-NEXT: [[TMP92:%.*]] = and i16 [[TMP91]], 31 +; CHECK-NEXT: [[TMP93:%.*]] = or i16 [[TMP91]], [[TMP92]] +; CHECK-NEXT: [[TMP94:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30 +; CHECK-NEXT: [[TMP95:%.*]] = and i16 [[TMP94]], 31 +; CHECK-NEXT: [[TMP96:%.*]] = or i16 [[TMP94]], [[TMP95]] +; CHECK-NEXT: [[TMP97:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31 +; CHECK-NEXT: [[TMP98:%.*]] = and i16 [[TMP97]], 31 +; CHECK-NEXT: [[TMP99:%.*]] = or i16 [[TMP97]], [[TMP98]] +; CHECK-NEXT: [[TMP100:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X1:%.*]], <32 x i16> [[TMP2]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP99]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP101:%.*]], label [[TMP102:%.*]], !prof [[PROF1]] +; CHECK: 101: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 102: +; CHECK-NEXT: [[TMP103:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1]], <32 x i16> [[X2:%.*]]) +; CHECK-NEXT: store <32 x i16> [[TMP100]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP103]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = and i16 [[TMP5]], 31 +; CHECK-NEXT: [[TMP7:%.*]] = or i16 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = and i16 [[TMP8]], 31 +; CHECK-NEXT: [[TMP10:%.*]] = or i16 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP12:%.*]] = and i16 [[TMP11]], 31 +; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 31 +; CHECK-NEXT: [[TMP16:%.*]] = or i16 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP18:%.*]] = and i16 [[TMP17]], 31 +; CHECK-NEXT: [[TMP19:%.*]] = or i16 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 31 +; CHECK-NEXT: [[TMP22:%.*]] = or i16 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP24:%.*]] = and i16 [[TMP23]], 31 +; CHECK-NEXT: [[TMP25:%.*]] = or i16 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP27:%.*]] = and i16 [[TMP26]], 31 +; CHECK-NEXT: [[TMP28:%.*]] = or i16 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP29]], 31 +; CHECK-NEXT: [[TMP31:%.*]] = or i16 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9 +; CHECK-NEXT: [[TMP33:%.*]] = and i16 [[TMP32]], 31 +; CHECK-NEXT: [[TMP34:%.*]] = or i16 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP36:%.*]] = and i16 [[TMP35]], 31 +; CHECK-NEXT: [[TMP37:%.*]] = or i16 [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11 +; CHECK-NEXT: [[TMP39:%.*]] = and i16 [[TMP38]], 31 +; CHECK-NEXT: [[TMP40:%.*]] = or i16 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP42:%.*]] = and i16 [[TMP41]], 31 +; CHECK-NEXT: [[TMP43:%.*]] = or i16 [[TMP41]], [[TMP42]] +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13 +; CHECK-NEXT: [[TMP45:%.*]] = and i16 [[TMP44]], 31 +; CHECK-NEXT: [[TMP46:%.*]] = or i16 [[TMP44]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14 +; CHECK-NEXT: [[TMP48:%.*]] = and i16 [[TMP47]], 31 +; CHECK-NEXT: [[TMP49:%.*]] = or i16 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15 +; CHECK-NEXT: [[TMP51:%.*]] = and i16 [[TMP50]], 31 +; CHECK-NEXT: [[TMP52:%.*]] = or i16 [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16 +; CHECK-NEXT: [[TMP54:%.*]] = and i16 [[TMP53]], 31 +; CHECK-NEXT: [[TMP55:%.*]] = or i16 [[TMP53]], [[TMP54]] +; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17 +; CHECK-NEXT: [[TMP57:%.*]] = and i16 [[TMP56]], 31 +; CHECK-NEXT: [[TMP58:%.*]] = or i16 [[TMP56]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18 +; CHECK-NEXT: [[TMP60:%.*]] = and i16 [[TMP59]], 31 +; CHECK-NEXT: [[TMP61:%.*]] = or i16 [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19 +; CHECK-NEXT: [[TMP63:%.*]] = and i16 [[TMP62]], 31 +; CHECK-NEXT: [[TMP64:%.*]] = or i16 [[TMP62]], [[TMP63]] +; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20 +; CHECK-NEXT: [[TMP66:%.*]] = and i16 [[TMP65]], 31 +; CHECK-NEXT: [[TMP67:%.*]] = or i16 [[TMP65]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21 +; CHECK-NEXT: [[TMP69:%.*]] = and i16 [[TMP68]], 31 +; CHECK-NEXT: [[TMP70:%.*]] = or i16 [[TMP68]], [[TMP69]] +; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22 +; CHECK-NEXT: [[TMP72:%.*]] = and i16 [[TMP71]], 31 +; CHECK-NEXT: [[TMP73:%.*]] = or i16 [[TMP71]], [[TMP72]] +; CHECK-NEXT: [[TMP74:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23 +; CHECK-NEXT: [[TMP75:%.*]] = and i16 [[TMP74]], 31 +; CHECK-NEXT: [[TMP76:%.*]] = or i16 [[TMP74]], [[TMP75]] +; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24 +; CHECK-NEXT: [[TMP78:%.*]] = and i16 [[TMP77]], 31 +; CHECK-NEXT: [[TMP79:%.*]] = or i16 [[TMP77]], [[TMP78]] +; CHECK-NEXT: [[TMP80:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25 +; CHECK-NEXT: [[TMP81:%.*]] = and i16 [[TMP80]], 31 +; CHECK-NEXT: [[TMP82:%.*]] = or i16 [[TMP80]], [[TMP81]] +; CHECK-NEXT: [[TMP83:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26 +; CHECK-NEXT: [[TMP84:%.*]] = and i16 [[TMP83]], 31 +; CHECK-NEXT: [[TMP85:%.*]] = or i16 [[TMP83]], [[TMP84]] +; CHECK-NEXT: [[TMP86:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27 +; CHECK-NEXT: [[TMP87:%.*]] = and i16 [[TMP86]], 31 +; CHECK-NEXT: [[TMP88:%.*]] = or i16 [[TMP86]], [[TMP87]] +; CHECK-NEXT: [[TMP89:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28 +; CHECK-NEXT: [[TMP90:%.*]] = and i16 [[TMP89]], 31 +; CHECK-NEXT: [[TMP91:%.*]] = or i16 [[TMP89]], [[TMP90]] +; CHECK-NEXT: [[TMP92:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29 +; CHECK-NEXT: [[TMP93:%.*]] = and i16 [[TMP92]], 31 +; CHECK-NEXT: [[TMP94:%.*]] = or i16 [[TMP92]], [[TMP93]] +; CHECK-NEXT: [[TMP95:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30 +; CHECK-NEXT: [[TMP96:%.*]] = and i16 [[TMP95]], 31 +; CHECK-NEXT: [[TMP97:%.*]] = or i16 [[TMP95]], [[TMP96]] +; CHECK-NEXT: [[TMP98:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31 +; CHECK-NEXT: [[TMP99:%.*]] = and i16 [[TMP98]], 31 +; CHECK-NEXT: [[TMP100:%.*]] = or i16 [[TMP98]], [[TMP99]] +; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X1:%.*]], <32 x i16> [[TMP2]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP100]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP102:%.*]], label [[TMP103:%.*]], !prof [[PROF1]] +; CHECK: 102: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 103: +; CHECK-NEXT: [[TMP104:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1]], <32 x i16> [[X2:%.*]]) +; CHECK-NEXT: [[TMP105:%.*]] = bitcast i32 [[TMP4]] to <32 x i1> +; CHECK-NEXT: [[TMP106:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP107:%.*]] = select <32 x i1> [[TMP106]], <32 x i16> [[TMP101]], <32 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP108:%.*]] = xor <32 x i16> [[TMP104]], [[X1]] +; CHECK-NEXT: [[TMP109:%.*]] = or <32 x i16> [[TMP108]], [[TMP101]] +; CHECK-NEXT: [[TMP110:%.*]] = or <32 x i16> [[TMP109]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP105]], <32 x i16> [[TMP110]], <32 x i16> [[TMP107]] +; CHECK-NEXT: [[TMP111:%.*]] = select <32 x i1> [[TMP106]], <32 x i16> [[TMP104]], <32 x i16> [[X1]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP111]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32) + +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_dbpsadbw_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <64 x i8> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <64 x i8> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]], i32 2) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[X4:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[X3:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[X3]] +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <64 x i8> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP17]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP18]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]] +; CHECK: 19: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 20: +; CHECK-NEXT: [[TMP21:%.*]] = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> [[X0]], <64 x i8> [[X1]], i32 3) +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32 [[X4]] to <32 x i1> +; CHECK-NEXT: [[TMP24:%.*]] = select <32 x i1> [[TMP23]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = xor <32 x i16> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = or <32 x i16> [[TMP25]], zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = or <32 x i16> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <32 x i1> [[TMP22]], <32 x i16> [[TMP27]], <32 x i16> [[TMP24]] +; CHECK-NEXT: [[TMP28:%.*]] = select <32 x i1> [[TMP23]], <32 x i16> [[TMP21]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = bitcast <64 x i8> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP29]], 0 +; CHECK-NEXT: [[TMP30:%.*]] = bitcast <64 x i8> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP30]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: br i1 [[_MSOR8]], label [[TMP31:%.*]], label [[TMP32:%.*]], !prof [[PROF1]] +; CHECK: 31: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 32: +; CHECK-NEXT: [[TMP33:%.*]] = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> [[X0]], <64 x i8> [[X1]], i32 4) +; CHECK-NEXT: [[TMP34:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } { <32 x i16> splat (i16 -1), <32 x i16> splat (i16 -1), <32 x i16> splat (i16 -1) }, <32 x i16> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> [[TMP16]], 0 +; CHECK-NEXT: [[TMP35:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP34]], <32 x i16> [[_MSPROP_SELECT1]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[RES3]], <32 x i16> [[TMP28]], 1 +; CHECK-NEXT: [[TMP36:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP35]], <32 x i16> zeroinitializer, 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[RES4]], <32 x i16> [[TMP33]], 2 +; CHECK-NEXT: store { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP36]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <32 x i16>, <32 x i16>, <32 x i16> } [[RES5]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 3, <32 x i16> zeroinitializer, i32 %x4) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 4, <32 x i16> %x3, i32 -1) + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 +} + +define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epu16_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epu16_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epu16_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epu16_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP8]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epu16_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <32 x i16> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[_MSPROP]] +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i16> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP16]], <32 x i16> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[TMP10]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP17]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epu16_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[_MSPROP]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP16]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epu16_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epu16_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epu16_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epu16_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP8]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epu16_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <32 x i16> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[_MSPROP]] +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i16> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP16]], <32 x i16> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[TMP10]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP17]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epu16_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[_MSPROP]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP16]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <64 x i8> @test_mask_adds_epu8_rr_512(<64 x i8> %a, <64 x i8> %b) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epu8_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B:%.*]]) +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_adds_epu8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epu8_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <64 x i8> [[TMP5]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP11]], <64 x i8> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[TMP5]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP12]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_adds_epu8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epu8_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[_MSPROP]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <64 x i8> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <64 x i8> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[TMP10]], <64 x i8> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP4]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP11]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_adds_epu8_rm_512(<64 x i8> %a, ptr %ptr_b) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epu8_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <64 x i8>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <64 x i8>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B]]) +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP8]] +; + %b = load <64 x i8>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_adds_epu8_rmk_512(<64 x i8> %a, ptr %ptr_b, <64 x i8> %passThru, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epu8_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <64 x i8>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <64 x i8>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <64 x i1> [[TMP12]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <64 x i8> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[_MSPROP]] +; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i8> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP16]], <64 x i8> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP12]], <64 x i8> [[TMP10]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP17]] +; + %b = load <64 x i8>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_adds_epu8_rmkz_512(<64 x i8> %a, ptr %ptr_b, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epu8_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <64 x i8>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <64 x i8>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[_MSPROP]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], [[_MSPROP]] +; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP16]] +; + %b = load <64 x i8>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask) + ret <64 x i8> %res +} + +declare <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <64 x i8> @test_mask_subs_epu8_rr_512(<64 x i8> %a, <64 x i8> %b) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epu8_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B:%.*]]) +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_subs_epu8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epu8_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <64 x i8> [[TMP5]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP11]], <64 x i8> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[TMP5]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP12]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_subs_epu8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epu8_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[_MSPROP]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <64 x i8> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <64 x i8> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[TMP10]], <64 x i8> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP4]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP11]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_subs_epu8_rm_512(<64 x i8> %a, ptr %ptr_b) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epu8_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <64 x i8>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <64 x i8>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B]]) +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP8]] +; + %b = load <64 x i8>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_subs_epu8_rmk_512(<64 x i8> %a, ptr %ptr_b, <64 x i8> %passThru, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epu8_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <64 x i8>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <64 x i8>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <64 x i1> [[TMP12]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <64 x i8> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[_MSPROP]] +; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i8> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP16]], <64 x i8> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP12]], <64 x i8> [[TMP10]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP17]] +; + %b = load <64 x i8>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_subs_epu8_rmkz_512(<64 x i8> %a, ptr %ptr_b, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epu8_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <64 x i8>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <64 x i8>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[_MSPROP]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], [[_MSPROP]] +; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP16]] +; + %b = load <64 x i8>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask) + ret <64 x i8> %res +} + +declare <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <32 x i16> @test_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind #0 { +; CHECK-LABEL: @test_adds_epi16_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) + ret <32 x i16> %1 +} + +define <32 x i16> @test_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_adds_epi16_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 +} + +define <32 x i16> @test_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_adds_epi16_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +define <32 x i16> @test_adds_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwind #0 { +; CHECK-LABEL: @test_adds_epi16_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP8]] +; + %b = load <32 x i16>, ptr %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) + ret <32 x i16> %1 +} + +define <32 x i16> @test_adds_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_adds_epi16_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <32 x i16> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[_MSPROP]] +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i16> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP16]], <32 x i16> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[TMP10]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP17]] +; + %b = load <32 x i16>, ptr %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 +} + +define <32 x i16> @test_adds_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_adds_epi16_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[_MSPROP]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP16]] +; + %b = load <32 x i16>, ptr %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +declare <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16>, <32 x i16>) + +define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epi16_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epi16_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epi16_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epi16_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP8]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epi16_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <32 x i16> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[_MSPROP]] +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i16> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP16]], <32 x i16> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[TMP10]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP17]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epi16_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[_MSPROP]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP16]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind #0 { +; CHECK-LABEL: @test_subs_epi16_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) + ret <32 x i16> %1 +} + +define <32 x i16> @test_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_subs_epi16_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 +} + +define <32 x i16> @test_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_subs_epi16_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +define <32 x i16> @test_subs_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwind #0 { +; CHECK-LABEL: @test_subs_epi16_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP8]] +; + %b = load <32 x i16>, ptr %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) + ret <32 x i16> %1 +} + +define <32 x i16> @test_subs_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_subs_epi16_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <32 x i16> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[_MSPROP]] +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i16> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP16]], <32 x i16> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[TMP10]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP17]] +; + %b = load <32 x i16>, ptr %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 +} + +define <32 x i16> @test_subs_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_subs_epi16_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[_MSPROP]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP16]] +; + %b = load <32 x i16>, ptr %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +declare <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16>, <32 x i16>) + +define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epi16_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epi16_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epi16_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epi16_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP8]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epi16_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <32 x i16> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[_MSPROP]] +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i16> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP16]], <32 x i16> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[TMP10]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP17]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epi16_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[_MSPROP]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP16]] +; + %b = load <32 x i16>, ptr %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <64 x i8> @test_mask_adds_epi8_rr_512(<64 x i8> %a, <64 x i8> %b) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epi8_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B:%.*]]) +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_adds_epi8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epi8_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <64 x i8> [[TMP5]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP11]], <64 x i8> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[TMP5]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP12]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_adds_epi8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epi8_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[_MSPROP]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <64 x i8> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <64 x i8> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[TMP10]], <64 x i8> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP4]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP11]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_adds_epi8_rm_512(<64 x i8> %a, ptr %ptr_b) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epi8_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <64 x i8>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <64 x i8>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B]]) +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP8]] +; + %b = load <64 x i8>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_adds_epi8_rmk_512(<64 x i8> %a, ptr %ptr_b, <64 x i8> %passThru, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epi8_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <64 x i8>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <64 x i8>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <64 x i1> [[TMP12]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <64 x i8> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[_MSPROP]] +; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i8> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP16]], <64 x i8> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP12]], <64 x i8> [[TMP10]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP17]] +; + %b = load <64 x i8>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_adds_epi8_rmkz_512(<64 x i8> %a, ptr %ptr_b, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_adds_epi8_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <64 x i8>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <64 x i8>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[_MSPROP]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], [[_MSPROP]] +; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP16]] +; + %b = load <64 x i8>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask) + ret <64 x i8> %res +} + +declare <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <64 x i8> @test_mask_subs_epi8_rr_512(<64 x i8> %a, <64 x i8> %b) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epi8_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B:%.*]]) +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_subs_epi8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epi8_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <64 x i8> [[TMP5]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP11]], <64 x i8> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[TMP5]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP12]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_subs_epi8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epi8_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[_MSPROP]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <64 x i8> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <64 x i8> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[TMP10]], <64 x i8> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP4]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP11]] +; + %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_subs_epi8_rm_512(<64 x i8> %a, ptr %ptr_b) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epi8_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <64 x i8>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <64 x i8>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B]]) +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP8]] +; + %b = load <64 x i8>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_subs_epi8_rmk_512(<64 x i8> %a, ptr %ptr_b, <64 x i8> %passThru, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epi8_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <64 x i8>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <64 x i8>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <64 x i1> [[TMP12]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <64 x i8> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[_MSPROP]] +; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i8> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP16]], <64 x i8> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP12]], <64 x i8> [[TMP10]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP17]] +; + %b = load <64 x i8>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_subs_epi8_rmkz_512(<64 x i8> %a, ptr %ptr_b, i64 %mask) nounwind #0 { +; CHECK-LABEL: @test_mask_subs_epi8_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <64 x i8>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <64 x i8>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> [[A:%.*]], <64 x i8> [[B]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[_MSPROP]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], [[_MSPROP]] +; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP16]] +; + %b = load <64 x i8>, ptr %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask) + ret <64 x i8> %res +} + +declare <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_psrlv32hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_psrlv32hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_maskz_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_psrlv32hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_psrav32_hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_psrav32_hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_maskz_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_psrav32_hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @test_int_x86_avx512_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_psllv32hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_psllv32hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + ret <32 x i16> %res +} + +define <32 x i16> @test_int_x86_avx512_maskz_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_psllv32hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) + ret <32 x i16> %res +} + +declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32) + +define <32 x i8> @test_int_x86_avx512_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmov_wb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <32 x i16> [[TMP1]] to <32 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = trunc <32 x i16> [[X0:%.*]] to <32 x i8> +; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[TMP2]] +; + %res = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_wb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <32 x i16> [[TMP1]] to <32 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = trunc <32 x i16> [[X0:%.*]] to <32 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X2:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> [[_MSPROP]], <32 x i8> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i8> [[TMP4]], [[X1:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i8> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i8> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP10]], <32 x i8> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> [[TMP4]], <32 x i8> [[X1]] +; CHECK-NEXT: store <32 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[TMP11]] +; + %res = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) + ret <32 x i8> %res +} + +define <32 x i8> @test_int_x86_avx512_maskz_pmov_wb_512(<32 x i16> %x0, i32 %x2) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmov_wb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <32 x i16> [[TMP1]] to <32 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = trunc <32 x i16> [[X0:%.*]] to <32 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[X2:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[_MSPROP]], <32 x i8> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i8> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i8> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> [[TMP9]], <32 x i8> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP3]], <32 x i8> zeroinitializer +; CHECK-NEXT: store <32 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[TMP10]] +; + %res = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) + ret <32 x i8> %res +} + +attributes #0 = { sanitize_memory } diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll new file mode 100644 index 0000000000000..6f2a6ccbfa702 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll @@ -0,0 +1,3687 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -S -mattr=+avx512f -passes=msan 2>&1 | FileCheck %s +; +; Forked from llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +; +; Strictly handled: +; - llvm.x86.avx512.dbpsadbw.512 +; - llvm.x86.avx512.ktestc.d, llvm.x86.avx512.ktestc.q, llvm.x86.avx512.ktestz.d, llvm.x86.avx512.ktestz.q +; - llvm.x86.avx512.mask.pmovs.wb.512, llvm.x86.avx512.mask.pmovs.wb.mem.512 +; - llvm.x86.avx512.mask.pmovus.wb.512, llvm.x86.avx512.mask.pmovus.wb.mem.512 +; - llvm.x86.avx512.mask.pmov.wb.mem.512 +; - llvm.x86.avx512.packssdw.512, llvm.x86.avx512.packsswb.512 +; - llvm.x86.avx512.packusdw.512, llvm.x86.avx512.packuswb.512 +; - llvm.x86.avx512.pmaddubs.w.512, llvm.x86.avx512.pmaddw.d.512 +; - llvm.x86.avx512.psad.bw.512 +; +; Heuristically handled: +; - llvm.x86.avx512.kadd.d, llvm.x86.avx512.kadd.q +; - llvm.x86.avx512.pavg.b.512, llvm.x86.avx512.pavg.w.512 +; - llvm.x86.avx512.permvar.hi.512 +; - llvm.x86.avx512.pmul.hr.sw.512, llvm.x86.avx512.pmulhu.w.512, llvm.x86.avx512.pmulh.w.512 +; - llvm.x86.avx512.pshuf.b.512 +; - llvm.x86.avx512.psllv.w.512 +; - llvm.x86.avx512.psrav.w.512, llvm.x86.avx512.psrlv.w.512 + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @test_int_x86_avx512_kadd_d(<32 x i16> %A, <32 x i16> %B) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_kadd_d( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = xor <32 x i16> [[A:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i16> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = xor <32 x i16> [[TMP3]], splat (i16 -1) +; CHECK-NEXT: [[TMP6:%.*]] = and <32 x i16> [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <32 x i16> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <32 x i1> [[TMP4]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i16> [[A]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[B:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i16> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP10]], splat (i16 -1) +; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i16> [[TMP12]], [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP1:%.*]] = and <32 x i1> [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <32 x i16> [[B]], zeroinitializer +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i1> [[_MSPROP_ICMP]], [[_MSPROP_ICMP1]] +; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1> [[TMP8]], <32 x i1> [[TMP15]]) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <32 x i1> [[_MSPROP]] to i32 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i1> [[TMP16]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = xor i32 [[TMP18]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP17]], 0 +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = xor i32 [[TMP20]], -1 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], [[TMP19]] +; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP23]], 0 +; CHECK-NEXT: [[_MSPROP_ICMP2:%.*]] = and i1 [[TMP21]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP18]], 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = zext i1 [[_MSPROP_ICMP2]] to i32 +; CHECK-NEXT: [[TMP26:%.*]] = zext i1 [[TMP25]] to i32 +; CHECK-NEXT: store i32 [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP26]] +; +entry: + %0 = icmp ne <32 x i16> %A, zeroinitializer + %1 = icmp ne <32 x i16> %B, zeroinitializer + %2 = call <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1> %0, <32 x i1> %1) + %3 = bitcast <32 x i1> %2 to i32 + %4 = icmp eq i32 %3, 0 + %5 = zext i1 %4 to i32 + ret i32 %5 +} +declare <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1>, <32 x i1>) + +define i32 @test_int_x86_avx512_kadd_q(<64 x i8> %A, <64 x i8> %B) nounwind #0 { +; CHECK-LABEL: @test_int_x86_avx512_kadd_q( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = xor <64 x i8> [[A:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = or <64 x i8> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <64 x i8> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = xor <64 x i8> [[TMP3]], splat (i8 -1) +; CHECK-NEXT: [[TMP6:%.*]] = and <64 x i8> [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <64 x i8> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <64 x i1> [[TMP4]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <64 x i8> [[A]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = xor <64 x i8> [[B:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <64 x i8> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP10]], splat (i8 -1) +; CHECK-NEXT: [[TMP13:%.*]] = and <64 x i8> [[TMP12]], [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP1:%.*]] = and <64 x i1> [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <64 x i8> [[B]], zeroinitializer +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i1> [[_MSPROP_ICMP]], [[_MSPROP_ICMP1]] +; CHECK-NEXT: [[TMP16:%.*]] = call <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1> [[TMP8]], <64 x i1> [[TMP15]]) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <64 x i1> [[_MSPROP]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i1> [[TMP16]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP17]], 0 +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i64 [[TMP20]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP20]], -1 +; CHECK-NEXT: [[TMP23:%.*]] = and i64 [[TMP22]], [[TMP19]] +; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[TMP23]], 0 +; CHECK-NEXT: [[_MSPROP_ICMP2:%.*]] = and i1 [[TMP21]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[TMP18]], 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = zext i1 [[_MSPROP_ICMP2]] to i32 +; CHECK-NEXT: [[TMP26:%.*]] = zext i1 [[TMP25]] to i32 +; CHECK-NEXT: store i32 [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP26]] +; +entry: + %0 = icmp ne <64 x i8> %A, zeroinitializer + %1 = icmp ne <64 x i8> %B, zeroinitializer + %2 = call <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1> %0, <64 x i1> %1) + %3 = bitcast <64 x i1> %2 to i64 + %4 = icmp eq i64 %3, 0 + %5 = zext i1 %4 to i32 + ret i32 %5 +} +declare <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1>, <64 x i1>) + +define i32 @test_x86_avx512_ktestc_d(<32 x i16> %A, <32 x i16> %B) #0 { +; CHECK-LABEL: @test_x86_avx512_ktestc_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <32 x i16> [[A:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = or <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <32 x i16> [[TMP4]], splat (i16 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <32 x i16> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <32 x i16> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <32 x i1> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i16> [[A]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = xor <32 x i16> [[B:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP11]], splat (i16 -1) +; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i16> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <32 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP1:%.*]] = and <32 x i1> [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <32 x i16> [[B]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <32 x i1> [[_MSPROP_ICMP]] to i32 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP17]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i1> [[_MSPROP_ICMP1]] to i32 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i32 [[TMP18]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: 19: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 20: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.d(<32 x i1> [[TMP9]], <32 x i1> [[TMP16]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %1 = icmp ne <32 x i16> %A, zeroinitializer + %2 = icmp ne <32 x i16> %B, zeroinitializer + %res = call i32 @llvm.x86.avx512.ktestc.d(<32 x i1> %1, <32 x i1> %2) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx512.ktestc.d(<32 x i1>, <32 x i1>) nounwind readnone + +define i32 @test_x86_avx512_ktestz_d(<32 x i16> %A, <32 x i16> %B) #0 { +; CHECK-LABEL: @test_x86_avx512_ktestz_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <32 x i16> [[A:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = or <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <32 x i16> [[TMP4]], splat (i16 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <32 x i16> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <32 x i16> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <32 x i1> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i16> [[A]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = xor <32 x i16> [[B:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP11]], splat (i16 -1) +; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i16> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <32 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP1:%.*]] = and <32 x i1> [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <32 x i16> [[B]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <32 x i1> [[_MSPROP_ICMP]] to i32 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP17]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i1> [[_MSPROP_ICMP1]] to i32 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i32 [[TMP18]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]] +; CHECK: 19: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 20: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.d(<32 x i1> [[TMP9]], <32 x i1> [[TMP16]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %1 = icmp ne <32 x i16> %A, zeroinitializer + %2 = icmp ne <32 x i16> %B, zeroinitializer + %res = call i32 @llvm.x86.avx512.ktestz.d(<32 x i1> %1, <32 x i1> %2) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx512.ktestz.d(<32 x i1>, <32 x i1>) nounwind readnone + +define i32 @test_x86_avx512_ktestc_q(<64 x i8> %A, <64 x i8> %B) #0 { +; CHECK-LABEL: @test_x86_avx512_ktestc_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <64 x i8> [[A:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = or <64 x i8> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <64 x i8> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <64 x i8> [[TMP4]], splat (i8 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <64 x i8> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <64 x i8> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <64 x i1> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <64 x i8> [[A]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = xor <64 x i8> [[B:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <64 x i8> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP11]], splat (i8 -1) +; CHECK-NEXT: [[TMP14:%.*]] = and <64 x i8> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <64 x i8> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP1:%.*]] = and <64 x i1> [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <64 x i8> [[B]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <64 x i1> [[_MSPROP_ICMP]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP17]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i1> [[_MSPROP_ICMP1]] to i64 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP18]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]] +; CHECK: 19: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 20: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.q(<64 x i1> [[TMP9]], <64 x i1> [[TMP16]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %1 = icmp ne <64 x i8> %A, zeroinitializer + %2 = icmp ne <64 x i8> %B, zeroinitializer + %res = call i32 @llvm.x86.avx512.ktestc.q(<64 x i1> %1, <64 x i1> %2) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx512.ktestc.q(<64 x i1>, <64 x i1>) nounwind readnone + +define i32 @test_x86_avx512_ktestz_q(<64 x i8> %A, <64 x i8> %B) #0 { +; CHECK-LABEL: @test_x86_avx512_ktestz_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <64 x i8> [[A:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = or <64 x i8> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <64 x i8> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <64 x i8> [[TMP4]], splat (i8 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <64 x i8> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <64 x i8> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <64 x i1> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <64 x i8> [[A]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = xor <64 x i8> [[B:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <64 x i8> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP11]], splat (i8 -1) +; CHECK-NEXT: [[TMP14:%.*]] = and <64 x i8> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <64 x i8> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP1:%.*]] = and <64 x i1> [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <64 x i8> [[B]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <64 x i1> [[_MSPROP_ICMP]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP17]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i1> [[_MSPROP_ICMP1]] to i64 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP18]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]] +; CHECK: 19: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 20: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.q(<64 x i1> [[TMP9]], <64 x i1> [[TMP16]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %1 = icmp ne <64 x i8> %A, zeroinitializer + %2 = icmp ne <64 x i8> %B, zeroinitializer + %res = call i32 @llvm.x86.avx512.ktestz.q(<64 x i1> %1, <64 x i1> %2) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx512.ktestz.q(<64 x i1>, <64 x i1>) nounwind readnone + +define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP7]] +; + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + ret <32 x i16> %1 +} + +define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP16]] +; + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 +} + +define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP9]], <32 x i16> [[TMP14]], <32 x i16> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP15]] +; + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %b = load <16 x i32>, ptr %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + ret <32 x i16> %1 +} + +define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP21]] +; + %b = load <16 x i32>, ptr %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 +} + +define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i32 %mask) #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP20]] +; + %b = load <16 x i32>, ptr %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rmb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + ret <32 x i16> %1 +} + +define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rmbk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP21]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 +} + +define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i32 %mask) #0 { +; CHECK-LABEL: @test_mask_packs_epi32_rmbkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP20]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>) + +define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) #0 { +; CHECK-LABEL: @test_mask_packs_epi16_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP7]] +; + %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) + ret <64 x i8> %1 +} + +define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) #0 { +; CHECK-LABEL: @test_mask_packs_epi16_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP16]] +; + %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru + ret <64 x i8> %3 +} + +define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) #0 { +; CHECK-LABEL: @test_mask_packs_epi16_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP9]], <64 x i8> [[TMP14]], <64 x i8> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP15]] +; + %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer + ret <64 x i8> %3 +} + +define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) #0 { +; CHECK-LABEL: @test_mask_packs_epi16_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP12]] +; + %b = load <32 x i16>, ptr %ptr_b + %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) + ret <64 x i8> %1 +} + +define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 x i8> %passThru, i64 %mask) #0 { +; CHECK-LABEL: @test_mask_packs_epi16_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP18:%.*]] = xor <64 x i8> [[TMP14]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i8> [[TMP19]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP20]], <64 x i8> [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP14]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP21]] +; + %b = load <32 x i16>, ptr %ptr_b + %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru + ret <64 x i8> %3 +} + +define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64 %mask) #0 { +; CHECK-LABEL: @test_mask_packs_epi16_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = xor <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP14]], <64 x i8> [[TMP19]], <64 x i8> [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP13]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP20]] +; + %b = load <32 x i16>, ptr %ptr_b + %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer + ret <64 x i8> %3 +} + +declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>) + + +define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP7]] +; + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + ret <32 x i16> %1 +} + +define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP16]] +; + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 +} + +define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP9]], <32 x i16> [[TMP14]], <32 x i16> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP15]] +; + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %b = load <16 x i32>, ptr %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + ret <32 x i16> %1 +} + +define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP21]] +; + %b = load <16 x i32>, ptr %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 +} + +define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i32 %mask) #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP20]] +; + %b = load <16 x i32>, ptr %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rmb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + ret <32 x i16> %1 +} + +define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rmbk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP21]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 +} + +define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i32 %mask) #0 { +; CHECK-LABEL: @test_mask_packus_epi32_rmbkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP20]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>) + +define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) #0 { +; CHECK-LABEL: @test_mask_packus_epi16_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP7]] +; + %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) + ret <64 x i8> %1 +} + +define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) #0 { +; CHECK-LABEL: @test_mask_packus_epi16_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP16]] +; + %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru + ret <64 x i8> %3 +} + +define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) #0 { +; CHECK-LABEL: @test_mask_packus_epi16_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP9]], <64 x i8> [[TMP14]], <64 x i8> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP15]] +; + %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer + ret <64 x i8> %3 +} + +define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) #0 { +; CHECK-LABEL: @test_mask_packus_epi16_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP12]] +; + %b = load <32 x i16>, ptr %ptr_b + %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) + ret <64 x i8> %1 +} + +define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 x i8> %passThru, i64 %mask) #0 { +; CHECK-LABEL: @test_mask_packus_epi16_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP18:%.*]] = xor <64 x i8> [[TMP14]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i8> [[TMP19]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP20]], <64 x i8> [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP14]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP21]] +; + %b = load <32 x i16>, ptr %ptr_b + %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru + ret <64 x i8> %3 +} + +define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64 %mask) #0 { +; CHECK-LABEL: @test_mask_packus_epi16_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <32 x i16>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = xor <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP14]], <64 x i8> [[TMP19]], <64 x i8> [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP13]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP20]] +; + %b = load <32 x i16>, ptr %ptr_b + %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer + ret <64 x i8> %3 +} + +declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>) + +define <32 x i16>@test_int_x86_avx512_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermt2var_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i16 [[TMP4]], 31 +; CHECK-NEXT: [[TMP6:%.*]] = or i16 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = and i16 [[TMP7]], 31 +; CHECK-NEXT: [[TMP9:%.*]] = or i16 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP11:%.*]] = and i16 [[TMP10]], 31 +; CHECK-NEXT: [[TMP12:%.*]] = or i16 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP14:%.*]] = and i16 [[TMP13]], 31 +; CHECK-NEXT: [[TMP15:%.*]] = or i16 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP17:%.*]] = and i16 [[TMP16]], 31 +; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP20:%.*]] = and i16 [[TMP19]], 31 +; CHECK-NEXT: [[TMP21:%.*]] = or i16 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP22]], 31 +; CHECK-NEXT: [[TMP24:%.*]] = or i16 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP26:%.*]] = and i16 [[TMP25]], 31 +; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP29:%.*]] = and i16 [[TMP28]], 31 +; CHECK-NEXT: [[TMP30:%.*]] = or i16 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9 +; CHECK-NEXT: [[TMP32:%.*]] = and i16 [[TMP31]], 31 +; CHECK-NEXT: [[TMP33:%.*]] = or i16 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP35:%.*]] = and i16 [[TMP34]], 31 +; CHECK-NEXT: [[TMP36:%.*]] = or i16 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11 +; CHECK-NEXT: [[TMP38:%.*]] = and i16 [[TMP37]], 31 +; CHECK-NEXT: [[TMP39:%.*]] = or i16 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP41:%.*]] = and i16 [[TMP40]], 31 +; CHECK-NEXT: [[TMP42:%.*]] = or i16 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13 +; CHECK-NEXT: [[TMP44:%.*]] = and i16 [[TMP43]], 31 +; CHECK-NEXT: [[TMP45:%.*]] = or i16 [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14 +; CHECK-NEXT: [[TMP47:%.*]] = and i16 [[TMP46]], 31 +; CHECK-NEXT: [[TMP48:%.*]] = or i16 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15 +; CHECK-NEXT: [[TMP50:%.*]] = and i16 [[TMP49]], 31 +; CHECK-NEXT: [[TMP51:%.*]] = or i16 [[TMP49]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16 +; CHECK-NEXT: [[TMP53:%.*]] = and i16 [[TMP52]], 31 +; CHECK-NEXT: [[TMP54:%.*]] = or i16 [[TMP52]], [[TMP53]] +; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17 +; CHECK-NEXT: [[TMP56:%.*]] = and i16 [[TMP55]], 31 +; CHECK-NEXT: [[TMP57:%.*]] = or i16 [[TMP55]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18 +; CHECK-NEXT: [[TMP59:%.*]] = and i16 [[TMP58]], 31 +; CHECK-NEXT: [[TMP60:%.*]] = or i16 [[TMP58]], [[TMP59]] +; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19 +; CHECK-NEXT: [[TMP62:%.*]] = and i16 [[TMP61]], 31 +; CHECK-NEXT: [[TMP63:%.*]] = or i16 [[TMP61]], [[TMP62]] +; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20 +; CHECK-NEXT: [[TMP65:%.*]] = and i16 [[TMP64]], 31 +; CHECK-NEXT: [[TMP66:%.*]] = or i16 [[TMP64]], [[TMP65]] +; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21 +; CHECK-NEXT: [[TMP68:%.*]] = and i16 [[TMP67]], 31 +; CHECK-NEXT: [[TMP69:%.*]] = or i16 [[TMP67]], [[TMP68]] +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22 +; CHECK-NEXT: [[TMP71:%.*]] = and i16 [[TMP70]], 31 +; CHECK-NEXT: [[TMP72:%.*]] = or i16 [[TMP70]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23 +; CHECK-NEXT: [[TMP74:%.*]] = and i16 [[TMP73]], 31 +; CHECK-NEXT: [[TMP75:%.*]] = or i16 [[TMP73]], [[TMP74]] +; CHECK-NEXT: [[TMP76:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24 +; CHECK-NEXT: [[TMP77:%.*]] = and i16 [[TMP76]], 31 +; CHECK-NEXT: [[TMP78:%.*]] = or i16 [[TMP76]], [[TMP77]] +; CHECK-NEXT: [[TMP79:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25 +; CHECK-NEXT: [[TMP80:%.*]] = and i16 [[TMP79]], 31 +; CHECK-NEXT: [[TMP81:%.*]] = or i16 [[TMP79]], [[TMP80]] +; CHECK-NEXT: [[TMP82:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26 +; CHECK-NEXT: [[TMP83:%.*]] = and i16 [[TMP82]], 31 +; CHECK-NEXT: [[TMP84:%.*]] = or i16 [[TMP82]], [[TMP83]] +; CHECK-NEXT: [[TMP85:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27 +; CHECK-NEXT: [[TMP86:%.*]] = and i16 [[TMP85]], 31 +; CHECK-NEXT: [[TMP87:%.*]] = or i16 [[TMP85]], [[TMP86]] +; CHECK-NEXT: [[TMP88:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28 +; CHECK-NEXT: [[TMP89:%.*]] = and i16 [[TMP88]], 31 +; CHECK-NEXT: [[TMP90:%.*]] = or i16 [[TMP88]], [[TMP89]] +; CHECK-NEXT: [[TMP91:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29 +; CHECK-NEXT: [[TMP92:%.*]] = and i16 [[TMP91]], 31 +; CHECK-NEXT: [[TMP93:%.*]] = or i16 [[TMP91]], [[TMP92]] +; CHECK-NEXT: [[TMP94:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30 +; CHECK-NEXT: [[TMP95:%.*]] = and i16 [[TMP94]], 31 +; CHECK-NEXT: [[TMP96:%.*]] = or i16 [[TMP94]], [[TMP95]] +; CHECK-NEXT: [[TMP97:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31 +; CHECK-NEXT: [[TMP98:%.*]] = and i16 [[TMP97]], 31 +; CHECK-NEXT: [[TMP99:%.*]] = or i16 [[TMP97]], [[TMP98]] +; CHECK-NEXT: [[TMP100:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0:%.*]], <32 x i16> [[TMP2]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP99]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP101:%.*]], label [[TMP102:%.*]], !prof [[PROF1]] +; CHECK: 101: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 102: +; CHECK-NEXT: [[TMP103:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X1:%.*]], <32 x i16> [[X0]], <32 x i16> [[X2:%.*]]) +; CHECK-NEXT: store <32 x i16> [[TMP100]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP103]] +; + %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) + ret <32 x i16> %1 +} + +define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = and i16 [[TMP5]], 31 +; CHECK-NEXT: [[TMP7:%.*]] = or i16 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = and i16 [[TMP8]], 31 +; CHECK-NEXT: [[TMP10:%.*]] = or i16 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP12:%.*]] = and i16 [[TMP11]], 31 +; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 31 +; CHECK-NEXT: [[TMP16:%.*]] = or i16 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP18:%.*]] = and i16 [[TMP17]], 31 +; CHECK-NEXT: [[TMP19:%.*]] = or i16 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 31 +; CHECK-NEXT: [[TMP22:%.*]] = or i16 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP24:%.*]] = and i16 [[TMP23]], 31 +; CHECK-NEXT: [[TMP25:%.*]] = or i16 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP27:%.*]] = and i16 [[TMP26]], 31 +; CHECK-NEXT: [[TMP28:%.*]] = or i16 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP29]], 31 +; CHECK-NEXT: [[TMP31:%.*]] = or i16 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9 +; CHECK-NEXT: [[TMP33:%.*]] = and i16 [[TMP32]], 31 +; CHECK-NEXT: [[TMP34:%.*]] = or i16 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP36:%.*]] = and i16 [[TMP35]], 31 +; CHECK-NEXT: [[TMP37:%.*]] = or i16 [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11 +; CHECK-NEXT: [[TMP39:%.*]] = and i16 [[TMP38]], 31 +; CHECK-NEXT: [[TMP40:%.*]] = or i16 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP42:%.*]] = and i16 [[TMP41]], 31 +; CHECK-NEXT: [[TMP43:%.*]] = or i16 [[TMP41]], [[TMP42]] +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13 +; CHECK-NEXT: [[TMP45:%.*]] = and i16 [[TMP44]], 31 +; CHECK-NEXT: [[TMP46:%.*]] = or i16 [[TMP44]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14 +; CHECK-NEXT: [[TMP48:%.*]] = and i16 [[TMP47]], 31 +; CHECK-NEXT: [[TMP49:%.*]] = or i16 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15 +; CHECK-NEXT: [[TMP51:%.*]] = and i16 [[TMP50]], 31 +; CHECK-NEXT: [[TMP52:%.*]] = or i16 [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16 +; CHECK-NEXT: [[TMP54:%.*]] = and i16 [[TMP53]], 31 +; CHECK-NEXT: [[TMP55:%.*]] = or i16 [[TMP53]], [[TMP54]] +; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17 +; CHECK-NEXT: [[TMP57:%.*]] = and i16 [[TMP56]], 31 +; CHECK-NEXT: [[TMP58:%.*]] = or i16 [[TMP56]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18 +; CHECK-NEXT: [[TMP60:%.*]] = and i16 [[TMP59]], 31 +; CHECK-NEXT: [[TMP61:%.*]] = or i16 [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19 +; CHECK-NEXT: [[TMP63:%.*]] = and i16 [[TMP62]], 31 +; CHECK-NEXT: [[TMP64:%.*]] = or i16 [[TMP62]], [[TMP63]] +; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20 +; CHECK-NEXT: [[TMP66:%.*]] = and i16 [[TMP65]], 31 +; CHECK-NEXT: [[TMP67:%.*]] = or i16 [[TMP65]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21 +; CHECK-NEXT: [[TMP69:%.*]] = and i16 [[TMP68]], 31 +; CHECK-NEXT: [[TMP70:%.*]] = or i16 [[TMP68]], [[TMP69]] +; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22 +; CHECK-NEXT: [[TMP72:%.*]] = and i16 [[TMP71]], 31 +; CHECK-NEXT: [[TMP73:%.*]] = or i16 [[TMP71]], [[TMP72]] +; CHECK-NEXT: [[TMP74:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23 +; CHECK-NEXT: [[TMP75:%.*]] = and i16 [[TMP74]], 31 +; CHECK-NEXT: [[TMP76:%.*]] = or i16 [[TMP74]], [[TMP75]] +; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24 +; CHECK-NEXT: [[TMP78:%.*]] = and i16 [[TMP77]], 31 +; CHECK-NEXT: [[TMP79:%.*]] = or i16 [[TMP77]], [[TMP78]] +; CHECK-NEXT: [[TMP80:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25 +; CHECK-NEXT: [[TMP81:%.*]] = and i16 [[TMP80]], 31 +; CHECK-NEXT: [[TMP82:%.*]] = or i16 [[TMP80]], [[TMP81]] +; CHECK-NEXT: [[TMP83:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26 +; CHECK-NEXT: [[TMP84:%.*]] = and i16 [[TMP83]], 31 +; CHECK-NEXT: [[TMP85:%.*]] = or i16 [[TMP83]], [[TMP84]] +; CHECK-NEXT: [[TMP86:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27 +; CHECK-NEXT: [[TMP87:%.*]] = and i16 [[TMP86]], 31 +; CHECK-NEXT: [[TMP88:%.*]] = or i16 [[TMP86]], [[TMP87]] +; CHECK-NEXT: [[TMP89:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28 +; CHECK-NEXT: [[TMP90:%.*]] = and i16 [[TMP89]], 31 +; CHECK-NEXT: [[TMP91:%.*]] = or i16 [[TMP89]], [[TMP90]] +; CHECK-NEXT: [[TMP92:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29 +; CHECK-NEXT: [[TMP93:%.*]] = and i16 [[TMP92]], 31 +; CHECK-NEXT: [[TMP94:%.*]] = or i16 [[TMP92]], [[TMP93]] +; CHECK-NEXT: [[TMP95:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30 +; CHECK-NEXT: [[TMP96:%.*]] = and i16 [[TMP95]], 31 +; CHECK-NEXT: [[TMP97:%.*]] = or i16 [[TMP95]], [[TMP96]] +; CHECK-NEXT: [[TMP98:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31 +; CHECK-NEXT: [[TMP99:%.*]] = and i16 [[TMP98]], 31 +; CHECK-NEXT: [[TMP100:%.*]] = or i16 [[TMP98]], [[TMP99]] +; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0:%.*]], <32 x i16> [[TMP2]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP100]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP102:%.*]], label [[TMP103:%.*]], !prof [[PROF1]] +; CHECK: 102: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 103: +; CHECK-NEXT: [[TMP104:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X1:%.*]], <32 x i16> [[X0]], <32 x i16> [[X2:%.*]]) +; CHECK-NEXT: [[TMP105:%.*]] = bitcast i32 [[TMP4]] to <32 x i1> +; CHECK-NEXT: [[TMP106:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP107:%.*]] = select <32 x i1> [[TMP106]], <32 x i16> [[TMP101]], <32 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP108:%.*]] = xor <32 x i16> [[TMP104]], [[X1]] +; CHECK-NEXT: [[TMP109:%.*]] = or <32 x i16> [[TMP108]], [[TMP101]] +; CHECK-NEXT: [[TMP110:%.*]] = or <32 x i16> [[TMP109]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP105]], <32 x i16> [[TMP110]], <32 x i16> [[TMP107]] +; CHECK-NEXT: [[TMP111:%.*]] = select <32 x i1> [[TMP106]], <32 x i16> [[TMP104]], <32 x i16> [[X1]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP111]] +; + %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) + %2 = bitcast i32 %x3 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1 + ret <32 x i16> %3 +} + +define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = and i16 [[TMP5]], 31 +; CHECK-NEXT: [[TMP7:%.*]] = or i16 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = and i16 [[TMP8]], 31 +; CHECK-NEXT: [[TMP10:%.*]] = or i16 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP12:%.*]] = and i16 [[TMP11]], 31 +; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 31 +; CHECK-NEXT: [[TMP16:%.*]] = or i16 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP18:%.*]] = and i16 [[TMP17]], 31 +; CHECK-NEXT: [[TMP19:%.*]] = or i16 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 31 +; CHECK-NEXT: [[TMP22:%.*]] = or i16 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP24:%.*]] = and i16 [[TMP23]], 31 +; CHECK-NEXT: [[TMP25:%.*]] = or i16 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP27:%.*]] = and i16 [[TMP26]], 31 +; CHECK-NEXT: [[TMP28:%.*]] = or i16 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP29]], 31 +; CHECK-NEXT: [[TMP31:%.*]] = or i16 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9 +; CHECK-NEXT: [[TMP33:%.*]] = and i16 [[TMP32]], 31 +; CHECK-NEXT: [[TMP34:%.*]] = or i16 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP36:%.*]] = and i16 [[TMP35]], 31 +; CHECK-NEXT: [[TMP37:%.*]] = or i16 [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11 +; CHECK-NEXT: [[TMP39:%.*]] = and i16 [[TMP38]], 31 +; CHECK-NEXT: [[TMP40:%.*]] = or i16 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP42:%.*]] = and i16 [[TMP41]], 31 +; CHECK-NEXT: [[TMP43:%.*]] = or i16 [[TMP41]], [[TMP42]] +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13 +; CHECK-NEXT: [[TMP45:%.*]] = and i16 [[TMP44]], 31 +; CHECK-NEXT: [[TMP46:%.*]] = or i16 [[TMP44]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14 +; CHECK-NEXT: [[TMP48:%.*]] = and i16 [[TMP47]], 31 +; CHECK-NEXT: [[TMP49:%.*]] = or i16 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15 +; CHECK-NEXT: [[TMP51:%.*]] = and i16 [[TMP50]], 31 +; CHECK-NEXT: [[TMP52:%.*]] = or i16 [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16 +; CHECK-NEXT: [[TMP54:%.*]] = and i16 [[TMP53]], 31 +; CHECK-NEXT: [[TMP55:%.*]] = or i16 [[TMP53]], [[TMP54]] +; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17 +; CHECK-NEXT: [[TMP57:%.*]] = and i16 [[TMP56]], 31 +; CHECK-NEXT: [[TMP58:%.*]] = or i16 [[TMP56]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18 +; CHECK-NEXT: [[TMP60:%.*]] = and i16 [[TMP59]], 31 +; CHECK-NEXT: [[TMP61:%.*]] = or i16 [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19 +; CHECK-NEXT: [[TMP63:%.*]] = and i16 [[TMP62]], 31 +; CHECK-NEXT: [[TMP64:%.*]] = or i16 [[TMP62]], [[TMP63]] +; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20 +; CHECK-NEXT: [[TMP66:%.*]] = and i16 [[TMP65]], 31 +; CHECK-NEXT: [[TMP67:%.*]] = or i16 [[TMP65]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21 +; CHECK-NEXT: [[TMP69:%.*]] = and i16 [[TMP68]], 31 +; CHECK-NEXT: [[TMP70:%.*]] = or i16 [[TMP68]], [[TMP69]] +; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22 +; CHECK-NEXT: [[TMP72:%.*]] = and i16 [[TMP71]], 31 +; CHECK-NEXT: [[TMP73:%.*]] = or i16 [[TMP71]], [[TMP72]] +; CHECK-NEXT: [[TMP74:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23 +; CHECK-NEXT: [[TMP75:%.*]] = and i16 [[TMP74]], 31 +; CHECK-NEXT: [[TMP76:%.*]] = or i16 [[TMP74]], [[TMP75]] +; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24 +; CHECK-NEXT: [[TMP78:%.*]] = and i16 [[TMP77]], 31 +; CHECK-NEXT: [[TMP79:%.*]] = or i16 [[TMP77]], [[TMP78]] +; CHECK-NEXT: [[TMP80:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25 +; CHECK-NEXT: [[TMP81:%.*]] = and i16 [[TMP80]], 31 +; CHECK-NEXT: [[TMP82:%.*]] = or i16 [[TMP80]], [[TMP81]] +; CHECK-NEXT: [[TMP83:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26 +; CHECK-NEXT: [[TMP84:%.*]] = and i16 [[TMP83]], 31 +; CHECK-NEXT: [[TMP85:%.*]] = or i16 [[TMP83]], [[TMP84]] +; CHECK-NEXT: [[TMP86:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27 +; CHECK-NEXT: [[TMP87:%.*]] = and i16 [[TMP86]], 31 +; CHECK-NEXT: [[TMP88:%.*]] = or i16 [[TMP86]], [[TMP87]] +; CHECK-NEXT: [[TMP89:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28 +; CHECK-NEXT: [[TMP90:%.*]] = and i16 [[TMP89]], 31 +; CHECK-NEXT: [[TMP91:%.*]] = or i16 [[TMP89]], [[TMP90]] +; CHECK-NEXT: [[TMP92:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29 +; CHECK-NEXT: [[TMP93:%.*]] = and i16 [[TMP92]], 31 +; CHECK-NEXT: [[TMP94:%.*]] = or i16 [[TMP92]], [[TMP93]] +; CHECK-NEXT: [[TMP95:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30 +; CHECK-NEXT: [[TMP96:%.*]] = and i16 [[TMP95]], 31 +; CHECK-NEXT: [[TMP97:%.*]] = or i16 [[TMP95]], [[TMP96]] +; CHECK-NEXT: [[TMP98:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31 +; CHECK-NEXT: [[TMP99:%.*]] = and i16 [[TMP98]], 31 +; CHECK-NEXT: [[TMP100:%.*]] = or i16 [[TMP98]], [[TMP99]] +; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0:%.*]], <32 x i16> [[TMP2]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP100]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP102:%.*]], label [[TMP103:%.*]], !prof [[PROF1]] +; CHECK: 102: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 103: +; CHECK-NEXT: [[TMP104:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X1:%.*]], <32 x i16> [[X0]], <32 x i16> [[X2:%.*]]) +; CHECK-NEXT: [[TMP105:%.*]] = bitcast i32 [[TMP4]] to <32 x i1> +; CHECK-NEXT: [[TMP106:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP107:%.*]] = select <32 x i1> [[TMP106]], <32 x i16> [[TMP101]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP108:%.*]] = xor <32 x i16> [[TMP104]], zeroinitializer +; CHECK-NEXT: [[TMP109:%.*]] = or <32 x i16> [[TMP108]], [[TMP101]] +; CHECK-NEXT: [[TMP110:%.*]] = or <32 x i16> [[TMP109]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP105]], <32 x i16> [[TMP110]], <32 x i16> [[TMP107]] +; CHECK-NEXT: [[TMP111:%.*]] = select <32 x i1> [[TMP106]], <32 x i16> [[TMP104]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP111]] +; + %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) + %2 = bitcast i32 %x3 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +declare <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>) + +define <32 x i16>@test_int_x86_avx512_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i16 [[TMP4]], 31 +; CHECK-NEXT: [[TMP6:%.*]] = or i16 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = and i16 [[TMP7]], 31 +; CHECK-NEXT: [[TMP9:%.*]] = or i16 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP11:%.*]] = and i16 [[TMP10]], 31 +; CHECK-NEXT: [[TMP12:%.*]] = or i16 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP14:%.*]] = and i16 [[TMP13]], 31 +; CHECK-NEXT: [[TMP15:%.*]] = or i16 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP17:%.*]] = and i16 [[TMP16]], 31 +; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP20:%.*]] = and i16 [[TMP19]], 31 +; CHECK-NEXT: [[TMP21:%.*]] = or i16 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP22]], 31 +; CHECK-NEXT: [[TMP24:%.*]] = or i16 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP26:%.*]] = and i16 [[TMP25]], 31 +; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP29:%.*]] = and i16 [[TMP28]], 31 +; CHECK-NEXT: [[TMP30:%.*]] = or i16 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9 +; CHECK-NEXT: [[TMP32:%.*]] = and i16 [[TMP31]], 31 +; CHECK-NEXT: [[TMP33:%.*]] = or i16 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP35:%.*]] = and i16 [[TMP34]], 31 +; CHECK-NEXT: [[TMP36:%.*]] = or i16 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11 +; CHECK-NEXT: [[TMP38:%.*]] = and i16 [[TMP37]], 31 +; CHECK-NEXT: [[TMP39:%.*]] = or i16 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP41:%.*]] = and i16 [[TMP40]], 31 +; CHECK-NEXT: [[TMP42:%.*]] = or i16 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13 +; CHECK-NEXT: [[TMP44:%.*]] = and i16 [[TMP43]], 31 +; CHECK-NEXT: [[TMP45:%.*]] = or i16 [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14 +; CHECK-NEXT: [[TMP47:%.*]] = and i16 [[TMP46]], 31 +; CHECK-NEXT: [[TMP48:%.*]] = or i16 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15 +; CHECK-NEXT: [[TMP50:%.*]] = and i16 [[TMP49]], 31 +; CHECK-NEXT: [[TMP51:%.*]] = or i16 [[TMP49]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16 +; CHECK-NEXT: [[TMP53:%.*]] = and i16 [[TMP52]], 31 +; CHECK-NEXT: [[TMP54:%.*]] = or i16 [[TMP52]], [[TMP53]] +; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17 +; CHECK-NEXT: [[TMP56:%.*]] = and i16 [[TMP55]], 31 +; CHECK-NEXT: [[TMP57:%.*]] = or i16 [[TMP55]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18 +; CHECK-NEXT: [[TMP59:%.*]] = and i16 [[TMP58]], 31 +; CHECK-NEXT: [[TMP60:%.*]] = or i16 [[TMP58]], [[TMP59]] +; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19 +; CHECK-NEXT: [[TMP62:%.*]] = and i16 [[TMP61]], 31 +; CHECK-NEXT: [[TMP63:%.*]] = or i16 [[TMP61]], [[TMP62]] +; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20 +; CHECK-NEXT: [[TMP65:%.*]] = and i16 [[TMP64]], 31 +; CHECK-NEXT: [[TMP66:%.*]] = or i16 [[TMP64]], [[TMP65]] +; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21 +; CHECK-NEXT: [[TMP68:%.*]] = and i16 [[TMP67]], 31 +; CHECK-NEXT: [[TMP69:%.*]] = or i16 [[TMP67]], [[TMP68]] +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22 +; CHECK-NEXT: [[TMP71:%.*]] = and i16 [[TMP70]], 31 +; CHECK-NEXT: [[TMP72:%.*]] = or i16 [[TMP70]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23 +; CHECK-NEXT: [[TMP74:%.*]] = and i16 [[TMP73]], 31 +; CHECK-NEXT: [[TMP75:%.*]] = or i16 [[TMP73]], [[TMP74]] +; CHECK-NEXT: [[TMP76:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24 +; CHECK-NEXT: [[TMP77:%.*]] = and i16 [[TMP76]], 31 +; CHECK-NEXT: [[TMP78:%.*]] = or i16 [[TMP76]], [[TMP77]] +; CHECK-NEXT: [[TMP79:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25 +; CHECK-NEXT: [[TMP80:%.*]] = and i16 [[TMP79]], 31 +; CHECK-NEXT: [[TMP81:%.*]] = or i16 [[TMP79]], [[TMP80]] +; CHECK-NEXT: [[TMP82:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26 +; CHECK-NEXT: [[TMP83:%.*]] = and i16 [[TMP82]], 31 +; CHECK-NEXT: [[TMP84:%.*]] = or i16 [[TMP82]], [[TMP83]] +; CHECK-NEXT: [[TMP85:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27 +; CHECK-NEXT: [[TMP86:%.*]] = and i16 [[TMP85]], 31 +; CHECK-NEXT: [[TMP87:%.*]] = or i16 [[TMP85]], [[TMP86]] +; CHECK-NEXT: [[TMP88:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28 +; CHECK-NEXT: [[TMP89:%.*]] = and i16 [[TMP88]], 31 +; CHECK-NEXT: [[TMP90:%.*]] = or i16 [[TMP88]], [[TMP89]] +; CHECK-NEXT: [[TMP91:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29 +; CHECK-NEXT: [[TMP92:%.*]] = and i16 [[TMP91]], 31 +; CHECK-NEXT: [[TMP93:%.*]] = or i16 [[TMP91]], [[TMP92]] +; CHECK-NEXT: [[TMP94:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30 +; CHECK-NEXT: [[TMP95:%.*]] = and i16 [[TMP94]], 31 +; CHECK-NEXT: [[TMP96:%.*]] = or i16 [[TMP94]], [[TMP95]] +; CHECK-NEXT: [[TMP97:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31 +; CHECK-NEXT: [[TMP98:%.*]] = and i16 [[TMP97]], 31 +; CHECK-NEXT: [[TMP99:%.*]] = or i16 [[TMP97]], [[TMP98]] +; CHECK-NEXT: [[TMP100:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X1:%.*]], <32 x i16> [[TMP2]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP99]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP101:%.*]], label [[TMP102:%.*]], !prof [[PROF1]] +; CHECK: 101: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 102: +; CHECK-NEXT: [[TMP103:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1]], <32 x i16> [[X2:%.*]]) +; CHECK-NEXT: store <32 x i16> [[TMP100]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP103]] +; + %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) + ret <32 x i16> %1 +} + +define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = and i16 [[TMP5]], 31 +; CHECK-NEXT: [[TMP7:%.*]] = or i16 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = and i16 [[TMP8]], 31 +; CHECK-NEXT: [[TMP10:%.*]] = or i16 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP12:%.*]] = and i16 [[TMP11]], 31 +; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 31 +; CHECK-NEXT: [[TMP16:%.*]] = or i16 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP18:%.*]] = and i16 [[TMP17]], 31 +; CHECK-NEXT: [[TMP19:%.*]] = or i16 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 31 +; CHECK-NEXT: [[TMP22:%.*]] = or i16 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP24:%.*]] = and i16 [[TMP23]], 31 +; CHECK-NEXT: [[TMP25:%.*]] = or i16 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP27:%.*]] = and i16 [[TMP26]], 31 +; CHECK-NEXT: [[TMP28:%.*]] = or i16 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP29]], 31 +; CHECK-NEXT: [[TMP31:%.*]] = or i16 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9 +; CHECK-NEXT: [[TMP33:%.*]] = and i16 [[TMP32]], 31 +; CHECK-NEXT: [[TMP34:%.*]] = or i16 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP36:%.*]] = and i16 [[TMP35]], 31 +; CHECK-NEXT: [[TMP37:%.*]] = or i16 [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11 +; CHECK-NEXT: [[TMP39:%.*]] = and i16 [[TMP38]], 31 +; CHECK-NEXT: [[TMP40:%.*]] = or i16 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP42:%.*]] = and i16 [[TMP41]], 31 +; CHECK-NEXT: [[TMP43:%.*]] = or i16 [[TMP41]], [[TMP42]] +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13 +; CHECK-NEXT: [[TMP45:%.*]] = and i16 [[TMP44]], 31 +; CHECK-NEXT: [[TMP46:%.*]] = or i16 [[TMP44]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14 +; CHECK-NEXT: [[TMP48:%.*]] = and i16 [[TMP47]], 31 +; CHECK-NEXT: [[TMP49:%.*]] = or i16 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15 +; CHECK-NEXT: [[TMP51:%.*]] = and i16 [[TMP50]], 31 +; CHECK-NEXT: [[TMP52:%.*]] = or i16 [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16 +; CHECK-NEXT: [[TMP54:%.*]] = and i16 [[TMP53]], 31 +; CHECK-NEXT: [[TMP55:%.*]] = or i16 [[TMP53]], [[TMP54]] +; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17 +; CHECK-NEXT: [[TMP57:%.*]] = and i16 [[TMP56]], 31 +; CHECK-NEXT: [[TMP58:%.*]] = or i16 [[TMP56]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18 +; CHECK-NEXT: [[TMP60:%.*]] = and i16 [[TMP59]], 31 +; CHECK-NEXT: [[TMP61:%.*]] = or i16 [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19 +; CHECK-NEXT: [[TMP63:%.*]] = and i16 [[TMP62]], 31 +; CHECK-NEXT: [[TMP64:%.*]] = or i16 [[TMP62]], [[TMP63]] +; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20 +; CHECK-NEXT: [[TMP66:%.*]] = and i16 [[TMP65]], 31 +; CHECK-NEXT: [[TMP67:%.*]] = or i16 [[TMP65]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21 +; CHECK-NEXT: [[TMP69:%.*]] = and i16 [[TMP68]], 31 +; CHECK-NEXT: [[TMP70:%.*]] = or i16 [[TMP68]], [[TMP69]] +; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22 +; CHECK-NEXT: [[TMP72:%.*]] = and i16 [[TMP71]], 31 +; CHECK-NEXT: [[TMP73:%.*]] = or i16 [[TMP71]], [[TMP72]] +; CHECK-NEXT: [[TMP74:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23 +; CHECK-NEXT: [[TMP75:%.*]] = and i16 [[TMP74]], 31 +; CHECK-NEXT: [[TMP76:%.*]] = or i16 [[TMP74]], [[TMP75]] +; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24 +; CHECK-NEXT: [[TMP78:%.*]] = and i16 [[TMP77]], 31 +; CHECK-NEXT: [[TMP79:%.*]] = or i16 [[TMP77]], [[TMP78]] +; CHECK-NEXT: [[TMP80:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25 +; CHECK-NEXT: [[TMP81:%.*]] = and i16 [[TMP80]], 31 +; CHECK-NEXT: [[TMP82:%.*]] = or i16 [[TMP80]], [[TMP81]] +; CHECK-NEXT: [[TMP83:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26 +; CHECK-NEXT: [[TMP84:%.*]] = and i16 [[TMP83]], 31 +; CHECK-NEXT: [[TMP85:%.*]] = or i16 [[TMP83]], [[TMP84]] +; CHECK-NEXT: [[TMP86:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27 +; CHECK-NEXT: [[TMP87:%.*]] = and i16 [[TMP86]], 31 +; CHECK-NEXT: [[TMP88:%.*]] = or i16 [[TMP86]], [[TMP87]] +; CHECK-NEXT: [[TMP89:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28 +; CHECK-NEXT: [[TMP90:%.*]] = and i16 [[TMP89]], 31 +; CHECK-NEXT: [[TMP91:%.*]] = or i16 [[TMP89]], [[TMP90]] +; CHECK-NEXT: [[TMP92:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29 +; CHECK-NEXT: [[TMP93:%.*]] = and i16 [[TMP92]], 31 +; CHECK-NEXT: [[TMP94:%.*]] = or i16 [[TMP92]], [[TMP93]] +; CHECK-NEXT: [[TMP95:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30 +; CHECK-NEXT: [[TMP96:%.*]] = and i16 [[TMP95]], 31 +; CHECK-NEXT: [[TMP97:%.*]] = or i16 [[TMP95]], [[TMP96]] +; CHECK-NEXT: [[TMP98:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31 +; CHECK-NEXT: [[TMP99:%.*]] = and i16 [[TMP98]], 31 +; CHECK-NEXT: [[TMP100:%.*]] = or i16 [[TMP98]], [[TMP99]] +; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X1:%.*]], <32 x i16> [[TMP2]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP100]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP102:%.*]], label [[TMP103:%.*]], !prof [[PROF1]] +; CHECK: 102: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 103: +; CHECK-NEXT: [[TMP104:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1]], <32 x i16> [[X2:%.*]]) +; CHECK-NEXT: [[TMP105:%.*]] = bitcast i32 [[TMP4]] to <32 x i1> +; CHECK-NEXT: [[TMP106:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP107:%.*]] = select <32 x i1> [[TMP106]], <32 x i16> [[TMP101]], <32 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP108:%.*]] = xor <32 x i16> [[TMP104]], [[X1]] +; CHECK-NEXT: [[TMP109:%.*]] = or <32 x i16> [[TMP108]], [[TMP101]] +; CHECK-NEXT: [[TMP110:%.*]] = or <32 x i16> [[TMP109]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP105]], <32 x i16> [[TMP110]], <32 x i16> [[TMP107]] +; CHECK-NEXT: [[TMP111:%.*]] = select <32 x i1> [[TMP106]], <32 x i16> [[TMP104]], <32 x i16> [[X1]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP111]] +; + %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) + %2 = bitcast i32 %x3 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1 + ret <32 x i16> %3 +} + +declare <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8>, <64 x i8>) + +define <64 x i8> @test_int_x86_avx512_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pavg_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %1 = call <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1) + ret <64 x i8> %1 +} + +define <64 x i8> @test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[X3:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <64 x i8> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i8> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP11]], <64 x i8> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[TMP5]], <64 x i8> [[X2]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP12]] +; + %1 = call <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1) + %2 = bitcast i64 %x3 to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %x2 + ret <64 x i8> %3 +} + +declare <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16>, <32 x i16>) + +define <32 x i16> @test_int_x86_avx512_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pavg_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %1 = call <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1) + ret <32 x i16> %1 +} + +define <32 x i16> @test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %1 = call <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1) + %2 = bitcast i32 %x3 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 + ret <32 x i16> %3 +} + +declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>) + +define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pshuf_b_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[RES]] +; + %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) + ret <64 x i8> %res +} + +define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %mask) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pshuf_b_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <64 x i1> [[MASK_CAST]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = xor <64 x i8> [[RES]], [[X2:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = or <64 x i8> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <64 x i8> [[TMP8]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[TMP9]], <64 x i8> [[TMP6]] +; CHECK-NEXT: [[RES2:%.*]] = select <64 x i1> [[MASK_CAST]], <64 x i8> [[RES]], <64 x i8> [[X2]] +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[RES2]] +; + %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) + %mask.cast = bitcast i64 %mask to <64 x i1> + %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> %x2 + ret <64 x i8> %res2 +} + +define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8> %x1, i64 %mask) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pshuf_b_512_maskz( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = select <64 x i1> [[MASK_CAST]], <64 x i8> [[_MSPROP]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <64 x i8> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = or <64 x i8> [[TMP6]], [[_MSPROP]] +; CHECK-NEXT: [[TMP8:%.*]] = or <64 x i8> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> [[TMP8]], <64 x i8> [[TMP5]] +; CHECK-NEXT: [[RES2:%.*]] = select <64 x i1> [[MASK_CAST]], <64 x i8> [[RES]], <64 x i8> zeroinitializer +; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[RES2]] +; + %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) + %mask.cast = bitcast i64 %mask to <64 x i1> + %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> zeroinitializer + ret <64 x i8> %res2 +} + +declare <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16>, <32 x i16>) + +define <32 x i16> @test_int_x86_avx512_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmulhu_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1) + ret <32 x i16> %1 +} + +define <32 x i16> @test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhu_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1) + %2 = bitcast i32 %x3 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 + ret <32 x i16> %3 +} + +declare <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16>, <32 x i16>) + +define <32 x i16> @test_int_x86_avx512_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmulh_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1) + ret <32 x i16> %1 +} + +define <32 x i16> @test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmulh_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1) + %2 = bitcast i32 %x3 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 + ret <32 x i16> %3 +} + +declare <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16>, <32 x i16>) + +define <32 x i16> @test_int_x86_avx512_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmulhr_sw_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1) + ret <32 x i16> %1 +} + +define <32 x i16> @test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhr_sw_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1) + %2 = bitcast i32 %x3 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 + ret <32 x i16> %3 +} + +define <32 x i8>@test_int_x86_avx512_pmov_wb_512(<32 x i16> %x0) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmov_wb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <32 x i16> [[TMP1]] to <32 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = trunc <32 x i16> [[X0:%.*]] to <32 x i8> +; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[TMP2]] +; + %1 = trunc <32 x i16> %x0 to <32 x i8> + ret <32 x i8> %1 +} + +define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_wb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <32 x i16> [[TMP1]] to <32 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = trunc <32 x i16> [[X0:%.*]] to <32 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X2:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> [[_MSPROP]], <32 x i8> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i8> [[TMP4]], [[X1:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i8> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i8> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP10]], <32 x i8> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> [[TMP4]], <32 x i8> [[X1]] +; CHECK-NEXT: store <32 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[TMP11]] +; + %1 = trunc <32 x i16> %x0 to <32 x i8> + %2 = bitcast i32 %x2 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x1 + ret <32 x i8> %3 +} + +define <32 x i8>@test_int_x86_avx512_maskz_pmov_wb_512(<32 x i16> %x0, i32 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmov_wb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <32 x i16> [[TMP1]] to <32 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = trunc <32 x i16> [[X0:%.*]] to <32 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[X2:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[_MSPROP]], <32 x i8> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i8> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i8> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> [[TMP9]], <32 x i8> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP3]], <32 x i8> zeroinitializer +; CHECK-NEXT: store <32 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[TMP10]] +; + %1 = trunc <32 x i16> %x0 to <32 x i8> + %2 = bitcast i32 %x2 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer + ret <32 x i8> %3 +} + +declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(ptr %ptr, <32 x i16>, i32) + +define void @test_int_x86_avx512_mask_pmov_wb_mem_512(ptr %ptr, <32 x i16> %x1, i32 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_wb_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.wb.mem.512(ptr [[PTR:%.*]], <32 x i16> [[X1:%.*]], i32 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.wb.mem.512(ptr [[PTR]], <32 x i16> [[X1]], i32 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmov.wb.mem.512(ptr %ptr, <32 x i16> %x1, i32 -1) + call void @llvm.x86.avx512.mask.pmov.wb.mem.512(ptr %ptr, <32 x i16> %x1, i32 %x2) + ret void +} + +declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32) + +define <32 x i8>@test_int_x86_avx512_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmovs_wb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> [[X0:%.*]], <32 x i8> [[X1:%.*]], i32 -1) +; CHECK-NEXT: store <32 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_wb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> [[X0:%.*]], <32 x i8> [[X1:%.*]], i32 [[X2:%.*]]) +; CHECK-NEXT: store <32 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) + ret <32 x i8> %res +} + +define <32 x i8>@test_int_x86_avx512_maskz_pmovs_wb_512(<32 x i16> %x0, i32 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovs_wb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> [[X0:%.*]], <32 x i8> zeroinitializer, i32 [[X2:%.*]]) +; CHECK-NEXT: store <32 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) + ret <32 x i8> %res +} + +declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(ptr %ptr, <32 x i16>, i32) + +define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(ptr %ptr, <32 x i16> %x1, i32 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_wb_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(ptr [[PTR:%.*]], <32 x i16> [[X1:%.*]], i32 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(ptr [[PTR]], <32 x i16> [[X1]], i32 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(ptr %ptr, <32 x i16> %x1, i32 -1) + call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(ptr %ptr, <32 x i16> %x1, i32 %x2) + ret void +} + +declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32) + +define <32 x i8>@test_int_x86_avx512_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmovus_wb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> [[X0:%.*]], <32 x i8> [[X1:%.*]], i32 -1) +; CHECK-NEXT: store <32 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_wb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> [[X0:%.*]], <32 x i8> [[X1:%.*]], i32 [[X2:%.*]]) +; CHECK-NEXT: store <32 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) + ret <32 x i8> %res +} + +define <32 x i8>@test_int_x86_avx512_maskz_pmovus_wb_512(<32 x i16> %x0, i32 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovus_wb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> [[X0:%.*]], <32 x i8> zeroinitializer, i32 [[X2:%.*]]) +; CHECK-NEXT: store <32 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) + ret <32 x i8> %res +} + +declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(ptr %ptr, <32 x i16>, i32) + +define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(ptr %ptr, <32 x i16> %x1, i32 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_wb_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(ptr [[PTR:%.*]], <32 x i16> [[X1:%.*]], i32 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(ptr [[PTR]], <32 x i16> [[X1]], i32 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(ptr %ptr, <32 x i16> %x1, i32 -1) + call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(ptr %ptr, <32 x i16> %x1, i32 %x2) + ret void +} + +declare <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>) + +define <32 x i16> @test_int_x86_avx512_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmaddubs_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <64 x i8> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <64 x i8> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP7]] +; + %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1) + ret <32 x i16> %1 +} + +define <32 x i16> @test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaddubs_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <64 x i8> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <64 x i8> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[X2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP16]] +; + %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1) + %2 = bitcast i32 %x3 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 + ret <32 x i16> %3 +} + +declare <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16>, <32 x i16>) + +define <16 x i32> @test_int_x86_avx512_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmaddw_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP7]] +; + %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1) + ret <16 x i32> %1 +} + +define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaddw_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], [[X2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP16]] +; + %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 + ret <16 x i32> %3 +} + +declare <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8>, <64 x i8>, i32) + +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_dbpsadbw_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <64 x i8> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <64 x i8> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]], i32 2) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[X4:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[X3:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[X3]] +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <64 x i8> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP17]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP18]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]] +; CHECK: 19: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 20: +; CHECK-NEXT: [[TMP21:%.*]] = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> [[X0]], <64 x i8> [[X1]], i32 3) +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32 [[X4]] to <32 x i1> +; CHECK-NEXT: [[TMP24:%.*]] = select <32 x i1> [[TMP23]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = xor <32 x i16> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = or <32 x i16> [[TMP25]], zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = or <32 x i16> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <32 x i1> [[TMP22]], <32 x i16> [[TMP27]], <32 x i16> [[TMP24]] +; CHECK-NEXT: [[TMP28:%.*]] = select <32 x i1> [[TMP23]], <32 x i16> [[TMP21]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = bitcast <64 x i8> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP29]], 0 +; CHECK-NEXT: [[TMP30:%.*]] = bitcast <64 x i8> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP30]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: br i1 [[_MSOR8]], label [[TMP31:%.*]], label [[TMP32:%.*]], !prof [[PROF1]] +; CHECK: 31: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 32: +; CHECK-NEXT: [[TMP33:%.*]] = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> [[X0]], <64 x i8> [[X1]], i32 4) +; CHECK-NEXT: [[TMP34:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } { <32 x i16> splat (i16 -1), <32 x i16> splat (i16 -1), <32 x i16> splat (i16 -1) }, <32 x i16> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES1:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> [[TMP16]], 0 +; CHECK-NEXT: [[TMP35:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP34]], <32 x i16> [[_MSPROP_SELECT1]], 1 +; CHECK-NEXT: [[RES2:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[RES1]], <32 x i16> [[TMP28]], 1 +; CHECK-NEXT: [[TMP36:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP35]], <32 x i16> zeroinitializer, 2 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } [[RES2]], <32 x i16> [[TMP33]], 2 +; CHECK-NEXT: store { <32 x i16>, <32 x i16>, <32 x i16> } [[TMP36]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <32 x i16>, <32 x i16>, <32 x i16> } [[RES3]] +; + %1 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2) + %2 = bitcast i32 %x4 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3 + %4 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 3) + %5 = bitcast i32 %x4 to <32 x i1> + %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer + %7 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 4) + %res1 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %3, 0 + %res2 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res1, <32 x i16> %6, 1 + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res2, <32 x i16> %7, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res3 +} + +declare <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>) + +define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){ +; CHECK-LABEL: @test_int_x86_avx512_mask_psadb_w_512( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES0:%.*]] = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> [[X0]], <64 x i8> [[X2:%.*]]) +; CHECK-NEXT: [[RES2:%.*]] = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> [[RES0]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <8 x i64>, <8 x i64> } [[RES2]], <8 x i64> [[RES1]], 1 +; CHECK-NEXT: store { <8 x i64>, <8 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <8 x i64>, <8 x i64> } [[RES3]] +; + %res0 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1) + %res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2) + %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 + %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1 + ret { <8 x i64>, <8 x i64> } %res3 +} + +declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) nounwind readnone + +define <32 x i16> @test_x86_avx512_psrlv_w_512_const() optsize #0 { +; CHECK-LABEL: @test_x86_avx512_psrlv_w_512_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES1:%.*]] = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> , <32 x i16> ) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES1]] +; + %res1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> , <32 x i16> ) + ret <32 x i16> %res1 +} + +define <32 x i16>@test_int_x86_avx512_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_psrlv32hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %x0, <32 x i16> %x1) + ret <32 x i16> %1 +} + +define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_psrlv32hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %x0, <32 x i16> %x1) + %2 = bitcast i32 %x3 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 + ret <32 x i16> %3 +} + +define <32 x i16>@test_int_x86_avx512_maskz_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_psrlv32hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %x0, <32 x i16> %x1) + %2 = bitcast i32 %x3 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +declare <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16>, <32 x i16>) + +define <32 x i16>@test_int_x86_avx512_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_psrav32_hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %x0, <32 x i16> %x1) + ret <32 x i16> %1 +} + +define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_psrav32_hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %x0, <32 x i16> %x1) + %2 = bitcast i32 %x3 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 + ret <32 x i16> %3 +} + +define <32 x i16>@test_int_x86_avx512_maskz_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_psrav32_hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %x0, <32 x i16> %x1) + %2 = bitcast i32 %x3 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_const(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_psrav32_hi_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> , <32 x i16> ) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; + %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> , <32 x i16> ) + ret <32 x i16> %1 +} + +define <32 x i16>@test_int_x86_avx512_psllv32hi(<32 x i16> %x0, <32 x i16> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_psllv32hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %x0, <32 x i16> %x1) + ret <32 x i16> %1 +} + +define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_psllv32hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %x0, <32 x i16> %x1) + %2 = bitcast i32 %x3 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 + ret <32 x i16> %3 +} + +define <32 x i16>@test_int_x86_avx512_maskz_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_psllv32hi( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %x0, <32 x i16> %x1) + %2 = bitcast i32 %x3 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>) + +define <32 x i16>@test_int_x86_avx512_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_permvar_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1) + ret <32 x i16> %1 +} + +define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[_MSPROP]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i16> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP11]], <32 x i16> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP5]], <32 x i16> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP12]] +; + %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1) + %2 = bitcast i32 %x3 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 + ret <32 x i16> %3 +} + +define <32 x i16>@test_int_x86_avx512_maskz_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[_MSPROP]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP11]] +; + %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1) + %2 = bitcast i32 %x3 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +define <32 x i16> @test_x86_avx512_psll_w_512(<32 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psll_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <32 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES]] +; + %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] + ret <32 x i16> %res +} +define <32 x i16> @test_x86_avx512_mask_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psll_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <32 x i16> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i16> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[TMP16]], <32 x i16> [[TMP13]] +; CHECK-NEXT: [[RES2:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[RES]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES2]] +; + %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] + %mask.cast = bitcast i32 %mask to <32 x i1> + %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru + ret <32 x i16> %res2 +} +define <32 x i16> @test_x86_avx512_maskz_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psll_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] +; CHECK-NEXT: [[RES2:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[RES]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES2]] +; + %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] + %mask.cast = bitcast i32 %mask to <32 x i1> + %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer + ret <32 x i16> %res2 +} +declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) nounwind readnone + + +define <32 x i16> @test_x86_avx512_psllv_w_512_const() optsize #0 { +; CHECK-LABEL: @test_x86_avx512_psllv_w_512_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES1:%.*]] = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> , <32 x i16> ) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES1]] +; + %res1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> , <32 x i16> ) + ret <32 x i16> %res1 +} +declare <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16>, <32 x i16>) nounwind readnone + +define <32 x i16> @test_x86_avx512_pslli_w_512(<32 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_pslli_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <32 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES]] +; + %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] + ret <32 x i16> %res +} +define <32 x i16> @test_x86_avx512_mask_pslli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_pslli_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = or <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[TMP5]], <32 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[RES2:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[RES]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES2]] +; + %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] + %mask.cast = bitcast i32 %mask to <32 x i1> + %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru + ret <32 x i16> %res2 +} +define <32 x i16> @test_x86_avx512_maskz_pslli_w_512(<32 x i16> %a0, i32 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_pslli_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP4:%.*]] = or <32 x i16> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i16> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i16> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP9]], <32 x i16> [[TMP6]] +; CHECK-NEXT: [[RES2:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[RES]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES2]] +; + %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] + %mask.cast = bitcast i32 %mask to <32 x i1> + %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer + ret <32 x i16> %res2 +} +declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) nounwind readnone + + +define <32 x i16> @test_x86_avx512_psra_w_512(<32 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psra_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <32 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES]] +; + %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] + ret <32 x i16> %res +} +define <32 x i16> @test_x86_avx512_mask_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psra_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <32 x i16> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i16> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[TMP16]], <32 x i16> [[TMP13]] +; CHECK-NEXT: [[RES2:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[RES]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES2]] +; + %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] + %mask.cast = bitcast i32 %mask to <32 x i1> + %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru + ret <32 x i16> %res2 +} +define <32 x i16> @test_x86_avx512_maskz_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psra_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] +; CHECK-NEXT: [[RES2:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[RES]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES2]] +; + %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] + %mask.cast = bitcast i32 %mask to <32 x i1> + %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer + ret <32 x i16> %res2 +} +declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) nounwind readnone + + +define <32 x i16> @test_x86_avx512_psrai_w_512(<32 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_psrai_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <32 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES]] +; + %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] + ret <32 x i16> %res +} +define <32 x i16> @test_x86_avx512_mask_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psrai_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = or <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[TMP5]], <32 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[RES2:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[RES]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES2]] +; + %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] + %mask.cast = bitcast i32 %mask to <32 x i1> + %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru + ret <32 x i16> %res2 +} +define <32 x i16> @test_x86_avx512_maskz_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psrai_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP4:%.*]] = or <32 x i16> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i16> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i16> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP9]], <32 x i16> [[TMP6]] +; CHECK-NEXT: [[RES2:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[RES]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES2]] +; + %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] + %mask.cast = bitcast i32 %mask to <32 x i1> + %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer + ret <32 x i16> %res2 +} +declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) nounwind readnone + + +define <32 x i16> @test_x86_avx512_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psrl_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <32 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES]] +; + %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] + ret <32 x i16> %res +} +define <32 x i16> @test_x86_avx512_mask_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psrl_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <32 x i16> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i16> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> [[TMP16]], <32 x i16> [[TMP13]] +; CHECK-NEXT: [[RES2:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[RES]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES2]] +; + %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] + %mask.cast = bitcast i32 %mask to <32 x i1> + %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru + ret <32 x i16> %res2 +} +define <32 x i16> @test_x86_avx512_maskz_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psrl_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] +; CHECK-NEXT: [[RES2:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[RES]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES2]] +; + %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] + %mask.cast = bitcast i32 %mask to <32 x i1> + %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer + ret <32 x i16> %res2 +} +declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) nounwind readnone + +define <32 x i16> @test_x86_avx512_psrl_w_512_load(<32 x i16> %a0, ptr %p) #0 { +; CHECK-LABEL: @test_x86_avx512_psrl_w_512_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[TMP9:%.*]] = trunc i128 [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = sext i1 [[TMP10]] to i512 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i512 [[TMP11]] to <32 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[TMP2]], <8 x i16> [[A1]]) +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP12]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <32 x i16> [[TMP14]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES]] +; + %a1 = load <8 x i16>, ptr %p + %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] + ret <32 x i16> %res +} + +define <32 x i16> @test_x86_avx512_psrli_w_512(<32 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_psrli_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <32 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES]] +; + %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] + ret <32 x i16> %res +} +define <32 x i16> @test_x86_avx512_mask_psrli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psrli_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = or <32 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[TMP5]], <32 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[RES2:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[RES]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES2]] +; + %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] + %mask.cast = bitcast i32 %mask to <32 x i1> + %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru + ret <32 x i16> %res2 +} +define <32 x i16> @test_x86_avx512_maskz_psrli_w_512(<32 x i16> %a0, i32 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psrli_w_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP4:%.*]] = or <32 x i16> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[TMP4]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i16> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i16> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP9]], <32 x i16> [[TMP6]] +; CHECK-NEXT: [[RES2:%.*]] = select <32 x i1> [[MASK_CAST]], <32 x i16> [[RES]], <32 x i16> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[RES2]] +; + %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] + %mask.cast = bitcast i32 %mask to <32 x i1> + %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer + ret <32 x i16> %res2 +} +declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) nounwind readnone + +attributes #0 = { sanitize_memory } From acb30465f1b267d3cf15043bb3aaff9ea4b79902 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Tue, 8 Jul 2025 17:04:35 +0000 Subject: [PATCH 2/2] undef -> poison --- .../X86/avx512bw-intrinsics-upgrade.ll | 48 +++++++++---------- .../X86/avx512bw-intrinsics.ll | 48 +++++++++---------- 2 files changed, 48 insertions(+), 48 deletions(-) diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll index c8e7db81ec753..db4ad6b8fc28b 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll @@ -2264,9 +2264,9 @@ define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) noun ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer -; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 @@ -2282,8 +2282,8 @@ define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) noun ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %q = load i32, ptr %ptr_b - %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 - %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %vecinit.i = insertelement <16 x i32> poison, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> poison, <16 x i32> zeroinitializer %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res } @@ -2307,9 +2307,9 @@ define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer -; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 @@ -2333,8 +2333,8 @@ define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32 ; CHECK-NEXT: ret <32 x i16> [[TMP21]] ; %q = load i32, ptr %ptr_b - %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 - %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %vecinit.i = insertelement <16 x i32> poison, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> poison, <16 x i32> zeroinitializer %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res } @@ -2357,9 +2357,9 @@ define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i3 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer -; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 @@ -2383,8 +2383,8 @@ define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i3 ; CHECK-NEXT: ret <32 x i16> [[TMP20]] ; %q = load i32, ptr %ptr_b - %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 - %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %vecinit.i = insertelement <16 x i32> poison, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> poison, <16 x i32> zeroinitializer %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res } @@ -2835,9 +2835,9 @@ define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) nou ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer -; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 @@ -2853,8 +2853,8 @@ define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) nou ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %q = load i32, ptr %ptr_b - %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 - %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %vecinit.i = insertelement <16 x i32> poison, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> poison, <16 x i32> zeroinitializer %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res } @@ -2878,9 +2878,9 @@ define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <3 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer -; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 @@ -2904,8 +2904,8 @@ define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <3 ; CHECK-NEXT: ret <32 x i16> [[TMP21]] ; %q = load i32, ptr %ptr_b - %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 - %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %vecinit.i = insertelement <16 x i32> poison, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> poison, <16 x i32> zeroinitializer %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res } @@ -2928,9 +2928,9 @@ define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer -; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 @@ -2954,8 +2954,8 @@ define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i ; CHECK-NEXT: ret <32 x i16> [[TMP20]] ; %q = load i32, ptr %ptr_b - %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 - %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %vecinit.i = insertelement <16 x i32> poison, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> poison, <16 x i32> zeroinitializer %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res } diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll index 6f2a6ccbfa702..3f09e54a0224d 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll @@ -531,9 +531,9 @@ define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) #0 { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer -; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 @@ -549,8 +549,8 @@ define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) #0 { ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %q = load i32, ptr %ptr_b - %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 - %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %vecinit.i = insertelement <16 x i32> poison, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> poison, <16 x i32> zeroinitializer %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) ret <32 x i16> %1 } @@ -574,9 +574,9 @@ define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer -; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 @@ -600,8 +600,8 @@ define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32 ; CHECK-NEXT: ret <32 x i16> [[TMP21]] ; %q = load i32, ptr %ptr_b - %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 - %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %vecinit.i = insertelement <16 x i32> poison, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> poison, <16 x i32> zeroinitializer %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) %2 = bitcast i32 %mask to <32 x i1> %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru @@ -626,9 +626,9 @@ define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i3 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer -; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 @@ -652,8 +652,8 @@ define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i3 ; CHECK-NEXT: ret <32 x i16> [[TMP20]] ; %q = load i32, ptr %ptr_b - %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 - %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %vecinit.i = insertelement <16 x i32> poison, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> poison, <16 x i32> zeroinitializer %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) %2 = bitcast i32 %mask to <32 x i1> %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer @@ -1122,9 +1122,9 @@ define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) #0 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer -; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 @@ -1140,8 +1140,8 @@ define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) #0 ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %q = load i32, ptr %ptr_b - %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 - %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %vecinit.i = insertelement <16 x i32> poison, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> poison, <16 x i32> zeroinitializer %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) ret <32 x i16> %1 } @@ -1165,9 +1165,9 @@ define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <3 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer -; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 @@ -1191,8 +1191,8 @@ define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <3 ; CHECK-NEXT: ret <32 x i16> [[TMP21]] ; %q = load i32, ptr %ptr_b - %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 - %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %vecinit.i = insertelement <16 x i32> poison, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> poison, <16 x i32> zeroinitializer %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) %2 = bitcast i32 %mask to <32 x i1> %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru @@ -1217,9 +1217,9 @@ define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer -; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 @@ -1243,8 +1243,8 @@ define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i ; CHECK-NEXT: ret <32 x i16> [[TMP20]] ; %q = load i32, ptr %ptr_b - %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 - %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %vecinit.i = insertelement <16 x i32> poison, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> poison, <16 x i32> zeroinitializer %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) %2 = bitcast i32 %mask to <32 x i1> %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer