diff --git a/llvm/test/Instrumentation/MemorySanitizer/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/avx-intrinsics-x86.ll new file mode 100644 index 0000000000000..70f4d6f237a41 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/avx-intrinsics-x86.ll @@ -0,0 +1,1818 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_addsub_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i64> [[_MSPROP]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_addsub_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK-LABEL: @test_x86_avx_blendv_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 64) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i64> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]], <4 x double> [[A2:%.*]]) +; CHECK-NEXT: store <4 x i64> [[_MSPROP1]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { +; CHECK-LABEL: @test_x86_avx_blendv_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 64) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]], <8 x float> [[A2:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_cmp_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone + + +define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_cmp_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <8 x i32> zeroinitializer, <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} + +define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_cmp_ps_256_pseudo_op( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[A2:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]], i8 0) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[A3:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A2]], i8 1) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP12]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[A4:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A3]], i8 2) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF0]] +; CHECK: 16: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 17: +; CHECK-NEXT: [[A5:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A4]], i8 3) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i256 [[TMP18]], 0 +; CHECK-NEXT: br i1 [[_MSCMP5]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF0]] +; CHECK: 19: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 20: +; CHECK-NEXT: [[A6:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A5]], i8 4) +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i256 [[TMP21]], 0 +; CHECK-NEXT: br i1 [[_MSCMP6]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF0]] +; CHECK: 22: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 23: +; CHECK-NEXT: [[A7:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A6]], i8 5) +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i256 [[TMP24]], 0 +; CHECK-NEXT: br i1 [[_MSCMP7]], label [[TMP25:%.*]], label [[TMP26:%.*]], !prof [[PROF0]] +; CHECK: 25: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 26: +; CHECK-NEXT: [[A8:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A7]], i8 6) +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i256 [[TMP27]], 0 +; CHECK-NEXT: br i1 [[_MSCMP8]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF0]] +; CHECK: 28: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 29: +; CHECK-NEXT: [[A9:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A8]], i8 7) +; CHECK-NEXT: [[TMP30:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i256 [[TMP30]], 0 +; CHECK-NEXT: br i1 [[_MSCMP9]], label [[TMP31:%.*]], label [[TMP32:%.*]], !prof [[PROF0]] +; CHECK: 31: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 32: +; CHECK-NEXT: [[A10:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A9]], i8 8) +; CHECK-NEXT: [[TMP33:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP10:%.*]] = icmp ne i256 [[TMP33]], 0 +; CHECK-NEXT: br i1 [[_MSCMP10]], label [[TMP34:%.*]], label [[TMP35:%.*]], !prof [[PROF0]] +; CHECK: 34: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 35: +; CHECK-NEXT: [[A11:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A10]], i8 9) +; CHECK-NEXT: [[TMP36:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i256 [[TMP36]], 0 +; CHECK-NEXT: br i1 [[_MSCMP11]], label [[TMP37:%.*]], label [[TMP38:%.*]], !prof [[PROF0]] +; CHECK: 37: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 38: +; CHECK-NEXT: [[A12:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A11]], i8 10) +; CHECK-NEXT: [[TMP39:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i256 [[TMP39]], 0 +; CHECK-NEXT: br i1 [[_MSCMP12]], label [[TMP40:%.*]], label [[TMP41:%.*]], !prof [[PROF0]] +; CHECK: 40: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 41: +; CHECK-NEXT: [[A13:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A12]], i8 11) +; CHECK-NEXT: [[TMP42:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i256 [[TMP42]], 0 +; CHECK-NEXT: br i1 [[_MSCMP13]], label [[TMP43:%.*]], label [[TMP44:%.*]], !prof [[PROF0]] +; CHECK: 43: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 44: +; CHECK-NEXT: [[A14:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A13]], i8 12) +; CHECK-NEXT: [[TMP45:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i256 [[TMP45]], 0 +; CHECK-NEXT: br i1 [[_MSCMP14]], label [[TMP46:%.*]], label [[TMP47:%.*]], !prof [[PROF0]] +; CHECK: 46: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 47: +; CHECK-NEXT: [[A15:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A14]], i8 13) +; CHECK-NEXT: [[TMP48:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP15:%.*]] = icmp ne i256 [[TMP48]], 0 +; CHECK-NEXT: br i1 [[_MSCMP15]], label [[TMP49:%.*]], label [[TMP50:%.*]], !prof [[PROF0]] +; CHECK: 49: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 50: +; CHECK-NEXT: [[A16:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A15]], i8 14) +; CHECK-NEXT: [[TMP51:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP16:%.*]] = icmp ne i256 [[TMP51]], 0 +; CHECK-NEXT: br i1 [[_MSCMP16]], label [[TMP52:%.*]], label [[TMP53:%.*]], !prof [[PROF0]] +; CHECK: 52: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 53: +; CHECK-NEXT: [[A17:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A16]], i8 15) +; CHECK-NEXT: [[TMP54:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP17:%.*]] = icmp ne i256 [[TMP54]], 0 +; CHECK-NEXT: br i1 [[_MSCMP17]], label [[TMP55:%.*]], label [[TMP56:%.*]], !prof [[PROF0]] +; CHECK: 55: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 56: +; CHECK-NEXT: [[A18:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A17]], i8 16) +; CHECK-NEXT: [[TMP57:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i256 [[TMP57]], 0 +; CHECK-NEXT: br i1 [[_MSCMP18]], label [[TMP58:%.*]], label [[TMP59:%.*]], !prof [[PROF0]] +; CHECK: 58: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 59: +; CHECK-NEXT: [[A19:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A18]], i8 17) +; CHECK-NEXT: [[TMP60:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i256 [[TMP60]], 0 +; CHECK-NEXT: br i1 [[_MSCMP19]], label [[TMP61:%.*]], label [[TMP62:%.*]], !prof [[PROF0]] +; CHECK: 61: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 62: +; CHECK-NEXT: [[A20:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A19]], i8 18) +; CHECK-NEXT: [[TMP63:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP20:%.*]] = icmp ne i256 [[TMP63]], 0 +; CHECK-NEXT: br i1 [[_MSCMP20]], label [[TMP64:%.*]], label [[TMP65:%.*]], !prof [[PROF0]] +; CHECK: 64: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 65: +; CHECK-NEXT: [[A21:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A20]], i8 19) +; CHECK-NEXT: [[TMP66:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP21:%.*]] = icmp ne i256 [[TMP66]], 0 +; CHECK-NEXT: br i1 [[_MSCMP21]], label [[TMP67:%.*]], label [[TMP68:%.*]], !prof [[PROF0]] +; CHECK: 67: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 68: +; CHECK-NEXT: [[A22:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A21]], i8 20) +; CHECK-NEXT: [[TMP69:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i256 [[TMP69]], 0 +; CHECK-NEXT: br i1 [[_MSCMP22]], label [[TMP70:%.*]], label [[TMP71:%.*]], !prof [[PROF0]] +; CHECK: 70: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 71: +; CHECK-NEXT: [[A23:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A22]], i8 21) +; CHECK-NEXT: [[TMP72:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i256 [[TMP72]], 0 +; CHECK-NEXT: br i1 [[_MSCMP23]], label [[TMP73:%.*]], label [[TMP74:%.*]], !prof [[PROF0]] +; CHECK: 73: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 74: +; CHECK-NEXT: [[A24:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A23]], i8 22) +; CHECK-NEXT: [[TMP75:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP24:%.*]] = icmp ne i256 [[TMP75]], 0 +; CHECK-NEXT: br i1 [[_MSCMP24]], label [[TMP76:%.*]], label [[TMP77:%.*]], !prof [[PROF0]] +; CHECK: 76: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 77: +; CHECK-NEXT: [[A25:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A24]], i8 23) +; CHECK-NEXT: [[TMP78:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i256 [[TMP78]], 0 +; CHECK-NEXT: br i1 [[_MSCMP25]], label [[TMP79:%.*]], label [[TMP80:%.*]], !prof [[PROF0]] +; CHECK: 79: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 80: +; CHECK-NEXT: [[A26:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A25]], i8 24) +; CHECK-NEXT: [[TMP81:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP26:%.*]] = icmp ne i256 [[TMP81]], 0 +; CHECK-NEXT: br i1 [[_MSCMP26]], label [[TMP82:%.*]], label [[TMP83:%.*]], !prof [[PROF0]] +; CHECK: 82: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 83: +; CHECK-NEXT: [[A27:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A26]], i8 25) +; CHECK-NEXT: [[TMP84:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP27:%.*]] = icmp ne i256 [[TMP84]], 0 +; CHECK-NEXT: br i1 [[_MSCMP27]], label [[TMP85:%.*]], label [[TMP86:%.*]], !prof [[PROF0]] +; CHECK: 85: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 86: +; CHECK-NEXT: [[A28:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A27]], i8 26) +; CHECK-NEXT: [[TMP87:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i256 [[TMP87]], 0 +; CHECK-NEXT: br i1 [[_MSCMP28]], label [[TMP88:%.*]], label [[TMP89:%.*]], !prof [[PROF0]] +; CHECK: 88: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 89: +; CHECK-NEXT: [[A29:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A28]], i8 27) +; CHECK-NEXT: [[TMP90:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP29:%.*]] = icmp ne i256 [[TMP90]], 0 +; CHECK-NEXT: br i1 [[_MSCMP29]], label [[TMP91:%.*]], label [[TMP92:%.*]], !prof [[PROF0]] +; CHECK: 91: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 92: +; CHECK-NEXT: [[A30:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A29]], i8 28) +; CHECK-NEXT: [[TMP93:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP30:%.*]] = icmp ne i256 [[TMP93]], 0 +; CHECK-NEXT: br i1 [[_MSCMP30]], label [[TMP94:%.*]], label [[TMP95:%.*]], !prof [[PROF0]] +; CHECK: 94: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 95: +; CHECK-NEXT: [[A31:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A30]], i8 29) +; CHECK-NEXT: [[TMP96:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP31:%.*]] = icmp ne i256 [[TMP96]], 0 +; CHECK-NEXT: br i1 [[_MSCMP31]], label [[TMP97:%.*]], label [[TMP98:%.*]], !prof [[PROF0]] +; CHECK: 97: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 98: +; CHECK-NEXT: [[A32:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A31]], i8 30) +; CHECK-NEXT: [[TMP99:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP32:%.*]] = icmp ne i256 [[TMP99]], 0 +; CHECK-NEXT: br i1 [[_MSCMP32]], label [[TMP100:%.*]], label [[TMP101:%.*]], !prof [[PROF0]] +; CHECK: 100: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 101: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A32]], i8 31) +; CHECK-NEXT: store <8 x i32> zeroinitializer, <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1] + %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1] + %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1] + %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1] + %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1] + %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1] + %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1] + %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1] + %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1] + %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1] + %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1] + %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1] + %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1] + %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1] + %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1] + %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1] + %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1] + %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1] + %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1] + %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1] + %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1] + %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1] + %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1] + %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1] + %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1] + %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1] + %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1] + %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1] + %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1] + %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1] + %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1] + %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone + + +define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_cvt_pd2_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone + + +define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_cvt_pd2dq_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone + + +define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_cvt_ps2dq_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> [[A0:%.*]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone + + +define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_cvtt_pd2dq_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone + + +define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_cvtt_ps2dq_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> [[A0:%.*]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone + + +define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_dp_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <8 x i32> zeroinitializer, <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone + + +define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_hadd_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i64> [[_MSPROP]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_hadd_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_hsub_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i64> [[_MSPROP]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_hsub_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) #0 { +; CHECK-LABEL: @test_x86_avx_ldu_dq_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint i8* [[A0:%.*]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to <32 x i8>* +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, <32 x i8>* [[TMP4]], align 1 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* [[A0]]) +; CHECK-NEXT: store <32 x i8> [[_MSLD]], <32 x i8>* bitcast ([100 x i64]* @__msan_retval_tls to <32 x i8>*), align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly + + +define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x i64> %mask) #0 { +; CHECK-LABEL: @test_x86_avx_maskload_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.maskload.pd(i8* [[A0:%.*]], <2 x i64> [[MASK:%.*]]) +; CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly + + +define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x i64> %mask) #0 { +; CHECK-LABEL: @test_x86_avx_maskload_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* [[A0:%.*]], <4 x i64> [[MASK:%.*]]) +; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly + + +define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x i32> %mask) #0 { +; CHECK-LABEL: @test_x86_avx_maskload_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.maskload.ps(i8* [[A0:%.*]], <4 x i32> [[MASK:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly + + +define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x i32> %mask) #0 { +; CHECK-LABEL: @test_x86_avx_maskload_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* [[A0:%.*]], <8 x i32> [[MASK:%.*]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly + + +define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) #0 { +; CHECK-LABEL: @test_x86_avx_maskstore_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 24) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: call void @llvm.x86.avx.maskstore.pd(i8* [[A0:%.*]], <2 x i64> [[MASK:%.*]], <2 x double> [[A2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind + + +define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) #0 { +; CHECK-LABEL: @test_x86_avx_maskstore_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i64> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: call void @llvm.x86.avx.maskstore.pd.256(i8* [[A0:%.*]], <4 x i64> [[MASK:%.*]], <4 x double> [[A2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind + + +define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) #0 { +; CHECK-LABEL: @test_x86_avx_maskstore_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 24) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: call void @llvm.x86.avx.maskstore.ps(i8* [[A0:%.*]], <4 x i32> [[MASK:%.*]], <4 x float> [[A2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind + + +define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) #0 { +; CHECK-LABEL: @test_x86_avx_maskstore_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: call void @llvm.x86.avx.maskstore.ps.256(i8* [[A0:%.*]], <8 x i32> [[MASK:%.*]], <8 x float> [[A2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind + + +define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_max_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i64> [[_MSPROP]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_max_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_min_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i64> [[_MSPROP]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_min_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_movmsk_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone + + +define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_movmsk_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone + + +define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_ptestc_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_ptestnzc_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_ptestz_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_rcp_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> [[A0:%.*]]) +; CHECK-NEXT: store <8 x i32> [[TMP1]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_round_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone + + +define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_round_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i32> zeroinitializer, <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone + + +define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_rsqrt_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> [[A0:%.*]]) +; CHECK-NEXT: store <8 x i32> [[TMP1]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone + +define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vpermilvar_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0:%.*]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone + + +define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vpermilvar_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0:%.*]], <4 x i64> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone + +define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_vpermilvar_pd_256_2( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0:%.*]], <4 x i64> ) +; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> ) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} + +define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vpermilvar_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0:%.*]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vpermilvar_ps_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A2:%.*]] = load <4 x i32>, <4 x i32>* [[A1:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <4 x i32>* [[A1]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to <4 x i32>* +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF0]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0:%.*]], <4 x i32> [[A2]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %a2 = load <4 x i32>, <4 x i32>* %a1 + %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone + + +define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vpermilvar_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestc_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestc_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestc_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestc_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestnzc_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestnzc_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestnzc_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestnzc_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestz_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestz_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestz_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestz_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define void @test_x86_avx_vzeroall() #0 { +; CHECK-LABEL: @test_x86_avx_vzeroall( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @llvm.x86.avx.vzeroall() +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx.vzeroall() + ret void +} +declare void @llvm.x86.avx.vzeroall() nounwind + + +define void @test_x86_avx_vzeroupper() #0 { +; CHECK-LABEL: @test_x86_avx_vzeroupper( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @llvm.x86.avx.vzeroupper() +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx.vzeroupper() + ret void +} +declare void @llvm.x86.avx.vzeroupper() nounwind + +define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind #0 { +; CHECK-LABEL: @movnt_dq( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A2:%.*]] = add <2 x i64> [[A1:%.*]], +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[_MSPROP]], <2 x i64> , <4 x i32> +; CHECK-NEXT: [[A3:%.*]] = shufflevector <2 x i64> [[A2]], <2 x i64> undef, <4 x i32> +; CHECK-NEXT: [[CAST:%.*]] = bitcast i8* [[P:%.*]] to <4 x i64>* +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <4 x i64>* [[CAST]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to <4 x i64>* +; CHECK-NEXT: store <4 x i64> [[_MSPROP1]], <4 x i64>* [[TMP7]], align 32 +; CHECK-NEXT: store <4 x i64> [[A3]], <4 x i64>* [[CAST]], align 32, !nontemporal !1 +; CHECK-NEXT: ret void +; + %a2 = add <2 x i64> %a1, + %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> + tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind + ret void +} +declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind + +define void @movnt_ps(i8* %p, <8 x float> %a) nounwind #0 { +; CHECK-LABEL: @movnt_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[CAST:%.*]] = bitcast i8* [[P:%.*]] to <8 x float>* +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <8 x float>* [[CAST]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to <8 x i32>* +; CHECK-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* [[TMP7]], align 32 +; CHECK-NEXT: store <8 x float> [[A:%.*]], <8 x float>* [[CAST]], align 32, !nontemporal !1 +; CHECK-NEXT: ret void +; + tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind + ret void +} +declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind + +define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind #0 { + ; add operation forces the execution domain. +; CHECK-LABEL: @movnt_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A2:%.*]] = fadd <4 x double> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[CAST:%.*]] = bitcast i8* [[P:%.*]] to <4 x double>* +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <4 x double>* [[CAST]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to <4 x i64>* +; CHECK-NEXT: store <4 x i64> [[_MSPROP]], <4 x i64>* [[TMP7]], align 32 +; CHECK-NEXT: store <4 x double> [[A2]], <4 x double>* [[CAST]], align 32, !nontemporal !1 +; CHECK-NEXT: ret void +; + %a2 = fadd <4 x double> %a1, + tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind + ret void +} +declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind + + +define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_pclmulqdq( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]], i8 0) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone + +attributes #0 = { sanitize_memory } diff --git a/llvm/test/Instrumentation/MemorySanitizer/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/avx2-intrinsics-x86.ll new file mode 100644 index 0000000000000..7c896c6ff0088 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/avx2-intrinsics-x86.ll @@ -0,0 +1,2427 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_packssdw( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP4]], <8 x i32> [[TMP6]]) +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP_VECTOR_PACK]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_packssdw_fold() #0 { +; CHECK-LABEL: @test_x86_avx2_packssdw_fold( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer) +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> ) +; CHECK-NEXT: store <16 x i16> [[_MSPROP_VECTOR_PACK]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> ) + ret <16 x i16> %res +} + + +define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_packsswb( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <16 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP4]], <16 x i16> [[TMP6]]) +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <32 x i8> [[_MSPROP_VECTOR_PACK]], <32 x i8>* bitcast ([100 x i64]* @__msan_retval_tls to <32 x i8>*), align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone + + +define <32 x i8> @test_x86_avx2_packsswb_fold() #0 { +; CHECK-LABEL: @test_x86_avx2_packsswb_fold( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> zeroinitializer, <16 x i16> zeroinitializer) +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> , <16 x i16> zeroinitializer) +; CHECK-NEXT: store <32 x i8> [[_MSPROP_VECTOR_PACK]], <32 x i8>* bitcast ([100 x i64]* @__msan_retval_tls to <32 x i8>*), align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> , <16 x i16> zeroinitializer) + ret <32 x i8> %res +} + + +define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_packuswb( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <16 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP4]], <16 x i16> [[TMP6]]) +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <32 x i8> [[_MSPROP_VECTOR_PACK]], <32 x i8>* bitcast ([100 x i64]* @__msan_retval_tls to <32 x i8>*), align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone + + +define <32 x i8> @test_x86_avx2_packuswb_fold() #0 { +; CHECK-LABEL: @test_x86_avx2_packuswb_fold( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> zeroinitializer, <16 x i16> zeroinitializer) +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> , <16 x i16> zeroinitializer) +; CHECK-NEXT: store <32 x i8> [[_MSPROP_VECTOR_PACK]], <32 x i8>* bitcast ([100 x i64]* @__msan_retval_tls to <32 x i8>*), align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> , <16 x i16> zeroinitializer) + ret <32 x i8> %res +} + + +define <32 x i8> @test_x86_avx2_pavg_b(<32 x i8> %a0, <32 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pavg_b( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([100 x i64]* @__msan_param_tls to <32 x i8>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <32 x i8>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: store <32 x i8> [[_MSPROP]], <32 x i8>* bitcast ([100 x i64]* @__msan_retval_tls to <32 x i8>*), align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_pavg_w(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pavg_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <16 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pmadd_wd( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <16 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[TMP6]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone + + +define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_pmovmskb( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([100 x i64]* @__msan_param_tls to <32 x i8>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x i8> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pmulh_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <16 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_pmulhu_w(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pmulhu_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <16 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psad_bw( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([100 x i64]* @__msan_param_tls to <32 x i8>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <32 x i8>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <4 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i64> [[TMP6]], +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i64> [[TMP7]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psll_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <8 x i32> [[TMP9]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psll_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <4 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i64> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <4 x i64> [[TMP9]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_psll_w(<16 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psll_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <16 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <16 x i16> [[TMP9]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_pslli_d(<8 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_pslli_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone + + +define <4 x i64> @test_x86_avx2_pslli_q(<4 x i64> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_pslli_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone + + +define <16 x i16> @test_x86_avx2_pslli_w(<16 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_pslli_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone + + +define <8 x i32> @test_x86_avx2_psra_d(<8 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psra_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <8 x i32> [[TMP9]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_psra_w(<16 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psra_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <16 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <16 x i16> [[TMP9]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_psrai_d(<8 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_psrai_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone + + +define <16 x i16> @test_x86_avx2_psrai_w(<16 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_psrai_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone + + +define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrl_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <8 x i32> [[TMP9]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrl_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <4 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i64> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <4 x i64> [[TMP9]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_psrl_w(<16 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrl_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <16 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <16 x i16> [[TMP9]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_psrl_w_load(<16 x i16> %a0, <8 x i16>* %p) #0 { +; CHECK-LABEL: @test_x86_avx2_psrl_w_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to i64*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A1:%.*]] = load <8 x i16>, <8 x i16>* [[P:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <8 x i16>* [[P]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to <8 x i16>* +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[TMP9:%.*]] = trunc i128 [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = sext i1 [[TMP10]] to i256 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i256 [[TMP11]] to <16 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[TMP2]], <8 x i16> [[A1]]) +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i16> [[TMP13]], [[TMP12]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <16 x i16> [[TMP14]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %a1 = load <8 x i16>, <8 x i16>* %p + %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} + + +define <8 x i32> @test_x86_avx2_psrli_d(<8 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_psrli_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone + + +define <4 x i64> @test_x86_avx2_psrli_q(<4 x i64> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_psrli_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone + + +define <16 x i16> @test_x86_avx2_psrli_w(<16 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_psrli_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone + + +define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_phadd_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_phadd_sw( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <16 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_phadd_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <16 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_phsub_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_phsub_sw( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <16 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_phsub_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <16 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pmadd_ub_sw( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([100 x i64]* @__msan_param_tls to <32 x i8>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <32 x i8>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[TMP6]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone + +define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(<32 x i8>* %ptr, <32 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pmadd_ub_sw_load_op0( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <32 x i8>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A0:%.*]] = load <32 x i8>, <32 x i8>* [[PTR:%.*]], align 32 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <32 x i8>* [[PTR]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to <32 x i8>* +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, <32 x i8>* [[TMP7]], align 32 +; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[TMP11]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %a0 = load <32 x i8>, <32 x i8>* %ptr + %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} + +define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pmul_hr_sw( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <16 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone + + +define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pshuf_b( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([100 x i64]* @__msan_param_tls to <32 x i8>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <32 x i8>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: store <32 x i8> [[_MSPROP]], <32 x i8>* bitcast ([100 x i64]* @__msan_retval_tls to <32 x i8>*), align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone + + +define <32 x i8> @test_x86_avx2_psign_b(<32 x i8> %a0, <32 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psign_b( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([100 x i64]* @__msan_param_tls to <32 x i8>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <32 x i8>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: store <32 x i8> [[_MSPROP]], <32 x i8>* bitcast ([100 x i64]* @__msan_retval_tls to <32 x i8>*), align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_psign_d(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psign_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psign_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <16 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_mpsadbw( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([100 x i64]* @__msan_param_tls to <32 x i8>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <32 x i8>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i8> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <16 x i16> zeroinitializer, <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone + +define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(<32 x i8>* %ptr, <32 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_mpsadbw_load_op0( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <32 x i8>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A0:%.*]] = load <32 x i8>, <32 x i8>* [[PTR:%.*]], align 32 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <32 x i8>* [[PTR]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to <32 x i8>* +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, <32 x i8>* [[TMP7]], align 32 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i8> [[_MSLD]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP11]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF0]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <16 x i16> zeroinitializer, <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %a0 = load <32 x i8>, <32 x i8>* %ptr + %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} + +define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_packusdw( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP4]], <8 x i32> [[TMP6]]) +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP_VECTOR_PACK]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_packusdw_fold() #0 { +; CHECK-LABEL: @test_x86_avx2_packusdw_fold( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer) +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> ) +; CHECK-NEXT: store <16 x i16> [[_MSPROP_VECTOR_PACK]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> ) + ret <16 x i16> %res +} + + +define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) #0 { +; CHECK-LABEL: @test_x86_avx2_pblendvb( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([100 x i64]* @__msan_param_tls to <32 x i8>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <32 x i8>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, <32 x i8>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 64) to <32 x i8>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <32 x i8> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]], <32 x i8> [[A2:%.*]]) +; CHECK-NEXT: store <32 x i8> [[_MSPROP1]], <32 x i8>* bitcast ([100 x i64]* @__msan_retval_tls to <32 x i8>*), align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pblendw( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <16 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]], <16 x i32> +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], <16 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i16>*), align 8 +; CHECK-NEXT: ret <16 x i16> [[TMP3]] +; + %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i8 7) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone + + +define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pblendd_128( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <4 x i32> +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i8 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone + + +define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pblendd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]], <8 x i32> +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[TMP3]] +; + %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone + + +define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_permd( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly + + +define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_permps( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx2.permps(<8 x float> [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly + + +define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_maskload_q( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* [[A0:%.*]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly + + +define <4 x i64> @test_x86_avx2_maskload_q_256(i8* %a0, <4 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_maskload_q_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* [[A0:%.*]], <4 x i64> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly + + +define <4 x i32> @test_x86_avx2_maskload_d(i8* %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_maskload_d( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* [[A0:%.*]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly + + +define <8 x i32> @test_x86_avx2_maskload_d_256(i8* %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_maskload_d_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly + + +define void @test_x86_avx2_maskstore_q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) #0 { +; CHECK-LABEL: @test_x86_avx2_maskstore_q( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 24) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: call void @llvm.x86.avx2.maskstore.q(i8* [[A0:%.*]], <2 x i64> [[A1:%.*]], <2 x i64> [[A2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) + ret void +} +declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind + + +define void @test_x86_avx2_maskstore_q_256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) #0 { +; CHECK-LABEL: @test_x86_avx2_maskstore_q_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i64> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: call void @llvm.x86.avx2.maskstore.q.256(i8* [[A0:%.*]], <4 x i64> [[A1:%.*]], <4 x i64> [[A2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) + ret void +} +declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind + + +define void @test_x86_avx2_maskstore_d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) #0 { +; CHECK-LABEL: @test_x86_avx2_maskstore_d( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 24) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: call void @llvm.x86.avx2.maskstore.d(i8* [[A0:%.*]], <4 x i32> [[A1:%.*]], <4 x i32> [[A2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) + ret void +} +declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind + + +define void @test_x86_avx2_maskstore_d_256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) #0 { +; CHECK-LABEL: @test_x86_avx2_maskstore_d_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: call void @llvm.x86.avx2.maskstore.d.256(i8* [[A0:%.*]], <8 x i32> [[A1:%.*]], <8 x i32> [[A2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) + ret void +} +declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind + + +define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psllv_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} + +define <4 x i32> @test_x86_avx2_psllv_d_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psllv_d_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES0:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES1:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[RES2:%.*]] = add <4 x i32> [[RES0]], [[RES1]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES2]] +; + %res0 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> , <4 x i32> ) + %res1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> , <4 x i32> ) + %res2 = add <4 x i32> %res0, %res1 + ret <4 x i32> %res2 +} +declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psllv_d_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> [[TMP1]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1]]) +; CHECK-NEXT: store <8 x i32> [[TMP6]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} + +define <8 x i32> @test_x86_avx2_psllv_d_256_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psllv_d_256_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> zeroinitializer, <8 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES0:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> , <8 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> zeroinitializer, <8 x i32> ) +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> , <8 x i32> ) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES0]], [[RES1]] +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES2]] +; + %res0 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> , <8 x i32> ) + %res1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> , <8 x i32> ) + %res2 = add <8 x i32> %res0, %res1 + ret <8 x i32> %res2 +} +declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_avx2_psllv_q(<2 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psllv_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +define <2 x i64> @test_x86_avx2_psllv_q_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psllv_q_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> zeroinitializer, <2 x i64> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> , <2 x i64> ) +; CHECK-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> , <2 x i64> ) + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_psllv_q_256(<4 x i64> %a0, <4 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psllv_q_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> [[TMP1]], <4 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1]]) +; CHECK-NEXT: store <4 x i64> [[TMP6]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} + +define <4 x i64> @test_x86_avx2_psllv_q_256_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psllv_q_256_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> zeroinitializer, <4 x i64> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> , <4 x i64> ) +; CHECK-NEXT: store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> , <4 x i64> ) + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrlv_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} + +define <4 x i32> @test_x86_avx2_psrlv_d_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psrlv_d_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES0:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES1:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[RES2:%.*]] = add <4 x i32> [[RES0]], [[RES1]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES2]] +; + %res0 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> , <4 x i32> ) + %res1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> , <4 x i32> ) + %res2 = add <4 x i32> %res0, %res1 + ret <4 x i32> %res2 +} +declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrlv_d_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> [[TMP1]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1]]) +; CHECK-NEXT: store <8 x i32> [[TMP6]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} + +define <8 x i32> @test_x86_avx2_psrlv_d_256_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psrlv_d_256_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> zeroinitializer, <8 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES0:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> , <8 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> zeroinitializer, <8 x i32> ) +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> , <8 x i32> ) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES0]], [[RES1]] +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES2]] +; + %res0 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> , <8 x i32> ) + %res1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> , <8 x i32> ) + %res2 = add <8 x i32> %res0, %res1 + ret <8 x i32> %res2 +} +declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_avx2_psrlv_q(<2 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrlv_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} + +define <2 x i64> @test_x86_avx2_psrlv_q_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psrlv_q_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> zeroinitializer, <2 x i64> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> , <2 x i64> ) +; CHECK-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> , <2 x i64> ) + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_psrlv_q_256(<4 x i64> %a0, <4 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrlv_q_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> [[TMP1]], <4 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1]]) +; CHECK-NEXT: store <4 x i64> [[TMP6]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} + + +define <4 x i64> @test_x86_avx2_psrlv_q_256_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psrlv_q_256_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> zeroinitializer, <4 x i64> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> , <4 x i64> ) +; CHECK-NEXT: store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> , <4 x i64> ) + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define <4 x i32> @test_x86_avx2_psrav_d(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrav_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} + +define <4 x i32> @test_x86_avx2_psrav_d_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psrav_d_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> , <4 x i32> ) + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone + +define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrav_d_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> [[TMP1]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1]]) +; CHECK-NEXT: store <8 x i32> [[TMP6]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} + +define <8 x i32> @test_x86_avx2_psrav_d_256_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psrav_d_256_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> zeroinitializer, <8 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> , <8 x i32> ) +; CHECK-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> , <8 x i32> ) + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone + +define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1, <4 x i32> %idx, <2 x double> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_d_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 24) to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> [[A0:%.*]], i8* [[A1:%.*]], <4 x i32> [[IDX:%.*]], <2 x double> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, + i8* %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ; + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, + <4 x i32>, <2 x double>, i8) nounwind readonly + +define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, i8* %a1, <4 x i32> %idx, <4 x double> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_d_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to i64*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 56) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i64> [[TMP4]] to i256 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> [[A0:%.*]], i8* [[A1:%.*]], <4 x i32> [[IDX:%.*]], <4 x double> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, + i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ; + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, + <4 x i32>, <4 x double>, i8) nounwind readonly + +define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, i8* %a1, <2 x i64> %idx, <2 x double> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_q_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 24) to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> [[A0:%.*]], i8* [[A1:%.*]], <2 x i64> [[IDX:%.*]], <2 x double> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, + i8* %a1, <2 x i64> %idx, <2 x double> %mask, i8 2) ; + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, + <2 x i64>, <2 x double>, i8) nounwind readonly + +define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, i8* %a1, <4 x i64> %idx, <4 x double> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_q_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to i64*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 72) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i64> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i64> [[TMP4]] to i256 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> [[A0:%.*]], i8* [[A1:%.*]], <4 x i64> [[IDX:%.*]], <4 x double> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, + i8* %a1, <4 x i64> %idx, <4 x double> %mask, i8 2) ; + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, + <4 x i64>, <4 x double>, i8) nounwind readonly + +define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, i8* %a1, <4 x i32> %idx, <4 x float> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_d_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 24) to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> [[A0:%.*]], i8* [[A1:%.*]], <4 x i32> [[IDX:%.*]], <4 x float> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, + i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, + <4 x i32>, <4 x float>, i8) nounwind readonly + +define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, i8* %a1, <8 x i32> %idx, <8 x float> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_d_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to i64*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 72) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i32> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i32> [[TMP4]] to i256 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> [[A0:%.*]], i8* [[A1:%.*]], <8 x i32> [[IDX:%.*]], <8 x float> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <8 x i32> zeroinitializer, <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, + i8* %a1, <8 x i32> %idx, <8 x float> %mask, i8 2) ; + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, + <8 x i32>, <8 x float>, i8) nounwind readonly + +define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, i8* %a1, <2 x i64> %idx, <4 x float> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_q_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 24) to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> [[A0:%.*]], i8* [[A1:%.*]], <2 x i64> [[IDX:%.*]], <4 x float> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, + i8* %a1, <2 x i64> %idx, <4 x float> %mask, i8 2) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, + <2 x i64>, <4 x float>, i8) nounwind readonly + +define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, i8* %a1, <4 x i64> %idx, <4 x float> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_q_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 24) to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 56) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i64> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> [[A0:%.*]], i8* [[A1:%.*]], <4 x i64> [[IDX:%.*]], <4 x float> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, + i8* %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, + <4 x i64>, <4 x float>, i8) nounwind readonly + +define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, i8* %a1, <4 x i32> %idx, <2 x i64> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_d_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 24) to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> [[A0:%.*]], i8* [[A1:%.*]], <4 x i32> [[IDX:%.*]], <2 x i64> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, + i8* %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, + <4 x i32>, <2 x i64>, i8) nounwind readonly + +define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, i8* %a1, <4 x i32> %idx, <4 x i64> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_d_q_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to i64*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 56) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i64> [[TMP4]] to i256 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> [[A0:%.*]], i8* [[A1:%.*]], <4 x i32> [[IDX:%.*]], <4 x i64> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, + i8* %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ; + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, + <4 x i32>, <4 x i64>, i8) nounwind readonly + +define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, i8* %a1, <2 x i64> %idx, <2 x i64> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_q_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 24) to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> [[A0:%.*]], i8* [[A1:%.*]], <2 x i64> [[IDX:%.*]], <2 x i64> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, + i8* %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, + <2 x i64>, <2 x i64>, i8) nounwind readonly + +define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, i8* %a1, <4 x i64> %idx, <4 x i64> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_q_q_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to i64*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 72) to <4 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i64> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i64> [[TMP4]] to i256 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> [[A0:%.*]], i8* [[A1:%.*]], <4 x i64> [[IDX:%.*]], <4 x i64> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, + i8* %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ; + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, + <4 x i64>, <4 x i64>, i8) nounwind readonly + +define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, i8* %a1, <4 x i32> %idx, <4 x i32> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_d_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 24) to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> [[A0:%.*]], i8* [[A1:%.*]], <4 x i32> [[IDX:%.*]], <4 x i32> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, + i8* %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, + <4 x i32>, <4 x i32>, i8) nounwind readonly + +define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, i8* %a1, <8 x i32> %idx, <8 x i32> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_d_d_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to i64*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 72) to <8 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i32> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i32> [[TMP4]] to i256 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> [[A0:%.*]], i8* [[A1:%.*]], <8 x i32> [[IDX:%.*]], <8 x i32> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <8 x i32> zeroinitializer, <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, + i8* %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ; + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, + <8 x i32>, <8 x i32>, i8) nounwind readonly + +define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, i8* %a1, <2 x i64> %idx, <4 x i32> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_q_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 24) to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> [[A0:%.*]], i8* [[A1:%.*]], <2 x i64> [[IDX:%.*]], <4 x i32> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, + i8* %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, + <2 x i64>, <4 x i32>, i8) nounwind readonly + +define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1, <4 x i64> %idx, <4 x i32> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_q_d_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 24) to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 56) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i64> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> [[A0:%.*]], i8* [[A1:%.*]], <4 x i64> [[IDX:%.*]], <4 x i32> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, + i8* %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, + <4 x i64>, <4 x i32>, i8) nounwind readonly + +define <8 x float> @test_gather_mask(<8 x float> %a0, float* %a, <8 x i32> %idx, <8 x float> %mask, float* nocapture %out) #0 { +; CHECK-LABEL: @test_gather_mask( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to i64*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 72) to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 104) to i64*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[A_I8:%.*]] = bitcast float* [[A:%.*]] to i8* +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i32> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP11]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF0]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i32> [[TMP4]] to i256 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP14]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> [[A0:%.*]], i8* [[A_I8]], <8 x i32> [[IDX:%.*]], <8 x float> [[MASK:%.*]], i8 4) +; CHECK-NEXT: [[OUT_PTR:%.*]] = bitcast float* [[OUT:%.*]] to <8 x float>* +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF0]] +; CHECK: 17: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 18: +; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint <8 x float>* [[OUT_PTR]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = xor i64 [[TMP19]], 87960930222080 +; CHECK-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP20]] to <8 x i32>* +; CHECK-NEXT: store <8 x i32> [[TMP4]], <8 x i32>* [[TMP21]], align 4 +; CHECK-NEXT: store <8 x float> [[MASK]], <8 x float>* [[OUT_PTR]], align 4 +; CHECK-NEXT: store <8 x i32> zeroinitializer, <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %a_i8 = bitcast float* %a to i8* + %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, + i8* %a_i8, <8 x i32> %idx, <8 x float> %mask, i8 4) ; + + %out_ptr = bitcast float * %out to <8 x float> * + store <8 x float> %mask, <8 x float> * %out_ptr, align 4 + + ret <8 x float> %res +} + +define <2 x i64> @test_mask_demanded_bits(<2 x i64> %a0, i8* %a1, <2 x i64> %idx, <2 x i1> %mask) #0 { +; CHECK-LABEL: @test_mask_demanded_bits( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i1>, <2 x i1>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 40) to <2 x i1>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 24) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[MASK1:%.*]] = sext <2 x i1> [[MASK:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSPROP]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> [[A0:%.*]], i8* [[A1:%.*]], <2 x i64> [[IDX:%.*]], <2 x i64> [[MASK1]], i8 2) +; CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %mask1 = sext <2 x i1> %mask to <2 x i64> + %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, + i8* %a1, <2 x i64> %idx, <2 x i64> %mask1, i8 2) ; + ret <2 x i64> %res +} + +attributes #0 = { sanitize_memory } diff --git a/llvm/test/Instrumentation/MemorySanitizer/sse-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/sse-intrinsics-x86.ll new file mode 100644 index 0000000000000..c3b98628649e7 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/sse-intrinsics-x86.ll @@ -0,0 +1,521 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_cmp_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone + + +define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_cmp_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone + + +define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_comieq_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_comige_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.comige.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_comigt_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_comile_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.comile.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_comilt_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_comineq_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse_cvtss2si( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.cvtss2si(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone + + +define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse_cvttss2si( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone + + +define void @test_x86_sse_ldmxcsr(i8* %a0) #0 { +; CHECK-LABEL: @test_x86_sse_ldmxcsr( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint i8* [[A0:%.*]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to i32* +; CHECK-NEXT: [[_LDMXCSR:%.*]] = load i32, i32* [[TMP4]], align 1 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_LDMXCSR]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: call void @llvm.x86.sse.ldmxcsr(i8* [[A0]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.sse.ldmxcsr(i8* %a0) + ret void +} +declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind + + + +define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_max_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_max_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_min_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_min_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse_movmsk_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone + + + +define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse_rcp_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse_rcp_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse_rsqrt_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse_rsqrt_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone + + +define void @test_x86_sse_stmxcsr(i8* %a0) #0 { +; CHECK-LABEL: @test_x86_sse_stmxcsr( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint i8* [[A0:%.*]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to i32* +; CHECK-NEXT: store i32 0, i32* [[TMP4]], align 4 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.sse.stmxcsr(i8* [[A0]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.sse.stmxcsr(i8* %a0) + ret void +} +declare void @llvm.x86.sse.stmxcsr(i8*) nounwind + + +define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_ucomieq_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_ucomige_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_ucomigt_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_ucomile_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_ucomilt_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_ucomineq_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define void @sfence() nounwind { +; CHECK-LABEL: @sfence( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: tail call void @llvm.x86.sse.sfence() +; CHECK-NEXT: ret void +; + tail call void @llvm.x86.sse.sfence() + ret void +} +declare void @llvm.x86.sse.sfence() nounwind + +attributes #0 = { sanitize_memory } diff --git a/llvm/test/Instrumentation/MemorySanitizer/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/sse2-intrinsics-x86.ll new file mode 100644 index 0000000000000..ec03c7bf6a577 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/sse2-intrinsics-x86.ll @@ -0,0 +1,1381 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_cmp_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i1> [[TMP4]] to <2 x i64> +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone + + +define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_cmp_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <2 x i64> +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <2 x i64> [[TMP7]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone + + +define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_comieq_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_comige_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comige.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_comigt_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_comile_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comile.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_comilt_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_comineq_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_cvtpd2dq( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone + + +define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind #0 { +; CHECK-LABEL: @test_mm_cvtpd_epi32_zext( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> +; CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[BC]] +; + %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) + %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> + %bc = bitcast <4 x i32> %res to <2 x i64> + ret <2 x i64> %bc +} + + +define <2 x i64> @test_mm_cvtpd_epi32_zext_load(<2 x double>* %p0) nounwind #0 { +; CHECK-LABEL: @test_mm_cvtpd_epi32_zext_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, <2 x double>* [[P0:%.*]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x double>* [[P0]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to <2 x i64>* +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0]]) +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> +; CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[BC]] +; + %a0 = load <2 x double>, <2 x double>* %p0 + %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) + %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> + %bc = bitcast <4 x i32> %res to <2 x i64> + ret <2 x i64> %bc +} + + +define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_cvtpd2ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone + +define <4 x float> @test_x86_sse2_cvtpd2ps_zext(<2 x double> %a0) nounwind #0 { +; CHECK-LABEL: @test_x86_sse2_cvtpd2ps_zext( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[CVT]], <4 x float> zeroinitializer, <4 x i32> +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) + %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> + ret <4 x float> %res +} + +define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(<2 x double>* %p0) nounwind #0 { +; CHECK-LABEL: @test_x86_sse2_cvtpd2ps_zext_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, <2 x double>* [[P0:%.*]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x double>* [[P0]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to <2 x i64>* +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0]]) +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[CVT]], <4 x float> zeroinitializer, <4 x i32> +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %a0 = load <2 x double>, <2 x double>* %p0 + %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) + %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> + ret <4 x float> %res +} + +define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_cvtps2dq( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone + + +define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_cvtsd2si( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_cvtsd2ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, <2 x double>* %p1) #0 { +; CHECK-LABEL: @test_x86_sse2_cvtsd2ss_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A1:%.*]] = load <2 x double>, <2 x double>* [[P1:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <2 x double>* [[P1]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to <2 x i64>* +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[_MSLD]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1]]) +; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %a1 = load <2 x double>, <2 x double>* %p1 + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} + + +define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, <2 x double>* %p1) optsize #0 { +; CHECK-LABEL: @test_x86_sse2_cvtsd2ss_load_optsize( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A1:%.*]] = load <2 x double>, <2 x double>* [[P1:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <2 x double>* [[P1]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to <2 x i64>* +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[_MSLD]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1]]) +; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %a1 = load <2 x double>, <2 x double>* %p1 + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} + + +define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_cvttpd2dq( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone + + +define <2 x i64> @test_mm_cvttpd_epi32_zext(<2 x double> %a0) nounwind #0 { +; CHECK-LABEL: @test_mm_cvttpd_epi32_zext( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> +; CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[BC]] +; + %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) + %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> + %bc = bitcast <4 x i32> %res to <2 x i64> + ret <2 x i64> %bc +} + + +define <2 x i64> @test_mm_cvttpd_epi32_zext_load(<2 x double>* %p0) nounwind #0 { +; CHECK-LABEL: @test_mm_cvttpd_epi32_zext_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, <2 x double>* [[P0:%.*]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x double>* [[P0]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to <2 x i64>* +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0]]) +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> +; CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[BC]] +; + %a0 = load <2 x double>, <2 x double>* %p0 + %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) + %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> + %bc = bitcast <4 x i32> %res to <2 x i64> + ret <2 x i64> %bc +} + + +define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_cvttps2dq( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone + + +define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_cvttsd2si( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_max_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_max_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP3]], <2 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_min_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_min_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP3]], <2 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_movmsk_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_packssdw_128( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP4]], <4 x i32> [[TMP6]]) +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP_VECTOR_PACK]], <8 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i16>*), align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_packssdw_128_fold() #0 { +; CHECK-LABEL: @test_x86_sse2_packssdw_128_fold( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer) +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: store <8 x i16> [[_MSPROP_VECTOR_PACK]], <8 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i16>*), align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> ) + ret <8 x i16> %res +} + + +define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_packsswb_128( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <8 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP4]], <8 x i16> [[TMP6]]) +; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i8> [[_MSPROP_VECTOR_PACK]], <16 x i8>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i8>*), align 8 +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_packsswb_128_fold() #0 { +; CHECK-LABEL: @test_x86_sse2_packsswb_128_fold( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> zeroinitializer) +; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> , <8 x i16> zeroinitializer) +; CHECK-NEXT: store <16 x i8> [[_MSPROP_VECTOR_PACK]], <16 x i8>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i8>*), align 8 +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> , <8 x i16> zeroinitializer) + ret <16 x i8> %res +} + + +define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_packuswb_128( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <8 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP4]], <8 x i16> [[TMP6]]) +; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i8> [[_MSPROP_VECTOR_PACK]], <16 x i8>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i8>*), align 8 +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_packuswb_128_fold() #0 { +; CHECK-LABEL: @test_x86_sse2_packuswb_128_fold( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> zeroinitializer) +; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> , <8 x i16> zeroinitializer) +; CHECK-NEXT: store <16 x i8> [[_MSPROP_VECTOR_PACK]], <16 x i8>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i8>*), align 8 +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> , <8 x i16> zeroinitializer) + ret <16 x i8> %res +} + + +define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_pavg_b( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i8>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <16 x i8>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], <16 x i8>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i8>*), align 8 +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_pavg_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <8 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], <8 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i16>*), align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_pmadd_wd( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <8 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone + + +define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_pmovmskb_128( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i8>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_pmulh_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <8 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], <8 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i16>*), align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_pmulhu_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <8 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], <8 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i16>*), align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psad_bw( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i8>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <16 x i8>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]]) +; CHECK-NEXT: store <2 x i64> [[TMP7]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psll_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psll_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <2 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psll_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <8 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <8 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i16>*), align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_pslli_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_pslli_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_pslli_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i16>*), align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psra_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psra_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <8 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <8 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i16>*), align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_psrai_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_psrai_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i16>*), align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psrl_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psrl_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <2 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psrl_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i16>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <8 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <8 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i16>*), align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psrl_w_load(<8 x i16> %a0, <8 x i16>* %p) #0 { +; CHECK-LABEL: @test_x86_sse2_psrl_w_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A1:%.*]] = load <8 x i16>, <8 x i16>* [[P:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <8 x i16>* [[P]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to <8 x i16>* +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[TMP9:%.*]] = trunc i128 [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = sext i1 [[TMP10]] to i128 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i128 [[TMP11]] to <8 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[TMP2]], <8 x i16> [[A1]]) +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], [[TMP12]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <8 x i16> [[TMP14]], <8 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i16>*), align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %a1 = load <8 x i16>, <8 x i16>* %p + %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} + + +define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_psrli_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_psrli_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_psrli_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i16>*), align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone + + +define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_ucomieq_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_ucomige_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_ucomigt_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_ucomile_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_ucomilt_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_ucomineq_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone + +define void @test_x86_sse2_pause() #0 { +; CHECK-LABEL: @test_x86_sse2_pause( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: tail call void @llvm.x86.sse2.pause() +; CHECK-NEXT: ret void +; + tail call void @llvm.x86.sse2.pause() + ret void +} +declare void @llvm.x86.sse2.pause() nounwind + +define void @lfence() nounwind #0 { +; CHECK-LABEL: @lfence( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: tail call void @llvm.x86.sse2.lfence() +; CHECK-NEXT: ret void +; + tail call void @llvm.x86.sse2.lfence() + ret void +} +declare void @llvm.x86.sse2.lfence() nounwind + +define void @mfence() nounwind #0 { +; CHECK-LABEL: @mfence( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: tail call void @llvm.x86.sse2.mfence() +; CHECK-NEXT: ret void +; + tail call void @llvm.x86.sse2.mfence() + ret void +} +declare void @llvm.x86.sse2.mfence() nounwind + +define void @clflush(i8* %p) nounwind #0 { +; CHECK-LABEL: @clflush( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: tail call void @llvm.x86.sse2.clflush(i8* [[P:%.*]]) +; CHECK-NEXT: ret void +; + tail call void @llvm.x86.sse2.clflush(i8* %p) + ret void +} +declare void @llvm.x86.sse2.clflush(i8*) nounwind + +attributes #0 = { sanitize_memory } diff --git a/llvm/test/Instrumentation/MemorySanitizer/sse41-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/sse41-intrinsics-x86.ll new file mode 100644 index 0000000000000..53f902ee695b7 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/sse41-intrinsics-x86.ll @@ -0,0 +1,463 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { +; CHECK-LABEL: @test_x86_sse41_blendvpd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]]) +; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK-LABEL: @test_x86_sse41_blendvps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_dppd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone + + +define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_dpps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone + + +define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_insertps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 17) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 17) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone + + + +define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_mpsadbw( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i8>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <16 x i8>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <8 x i16> zeroinitializer, <8 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i16>*), align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone + +define <8 x i16> @test_x86_sse41_mpsadbw_load_op0(<16 x i8>* %ptr, <16 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_mpsadbw_load_op0( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @__msan_param_tls, i32 0, i32 0), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 8) to <16 x i8>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A0:%.*]] = load <16 x i8>, <16 x i8>* [[PTR:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <16 x i8>* [[PTR]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to <16 x i8>* +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF0]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> [[A0]], <16 x i8> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <8 x i16> zeroinitializer, <8 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i16>*), align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %a0 = load <16 x i8>, <16 x i8>* %ptr + %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} + +define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_packusdw( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP4]], <4 x i32> [[TMP6]]) +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP_VECTOR_PACK]], <8 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i16>*), align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_packusdw_fold() #0 { +; CHECK-LABEL: @test_x86_sse41_packusdw_fold( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer) +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: store <8 x i16> [[_MSPROP_VECTOR_PACK]], <8 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i16>*), align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> zeroinitializer, <4 x i32> ) + ret <8 x i16> %res +} + + +define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) #0 { +; CHECK-LABEL: @test_x86_sse41_pblendvb( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([100 x i64]* @__msan_param_tls to <16 x i8>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <16 x i8>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <16 x i8>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]], <16 x i8> [[A2:%.*]]) +; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], <16 x i8>* bitcast ([100 x i64]* @__msan_retval_tls to <16 x i8>*), align 8 +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_sse41_phminposuw( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i16>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> [[A0:%.*]]) +; CHECK-NEXT: store <8 x i16> [[TMP1]], <8 x i16>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i16>*), align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone + + +define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_ptestc( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse41.ptestc(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone + + +define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_ptestnzc( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone + + +define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_ptestz( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse41.ptestz(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone + + +define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_sse41_round_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone + + +define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse41_round_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone + + +define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_round_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 7) +; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone + + +define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, <2 x double>* %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_round_sd_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A1B:%.*]] = load <2 x double>, <2 x double>* [[A1:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <2 x double>* [[A1]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to <2 x i64>* +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[_MSLD]], <2 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1B]], i32 7) +; CHECK-NEXT: store <2 x i64> [[TMP8]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %a1b = load <2 x double>, <2 x double>* %a1 + %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1b, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} + + +define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_round_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i32 7) +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone + +attributes #0 = { sanitize_memory }