diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll index a43d9400cde6c..39b2b6225d8b1 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll @@ -1,42 +1,37 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefixes=ALL,X86 -; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+avx | FileCheck %s --check-prefixes=ALL,X86 -; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefixes=ALL,X64 -; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+avx | FileCheck %s --check-prefixes=ALL,X64 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test1: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: phaddw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test1: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: phaddw %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test1(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test1( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -51,37 +46,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone -define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test88: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pcmpgtd {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test88: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pcmpgtd %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test88(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test88( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -96,37 +86,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test87: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pcmpgtw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test87: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pcmpgtw %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test87(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test87( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -141,37 +126,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone -define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test86: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pcmpgtb {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test86: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pcmpgtb %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test86(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test86( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -186,37 +166,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone -define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test85: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pcmpeqd {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test85: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pcmpeqd %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test85(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test85( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -231,37 +206,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test84: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pcmpeqw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test84: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pcmpeqw %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test84(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test84( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -276,37 +246,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone -define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test83: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pcmpeqb {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test83: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pcmpeqb %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test83(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test83( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -321,37 +286,32 @@ entry: declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone -define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test82: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: punpckldq {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0] -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test82: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test82(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test82( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -366,37 +326,32 @@ entry: declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone -define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test81: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: punpcklwd {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test81: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test81(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test81( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -411,37 +366,32 @@ entry: declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone -define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test80: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: punpcklbw {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test80: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test80(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test80( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -456,37 +406,32 @@ entry: declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone -define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test79: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: punpckhdq {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[1],mem[1] -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test79: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test79(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test79( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -501,37 +446,32 @@ entry: declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone -define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test78: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: punpckhwd {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test78: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test78(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test78( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -546,37 +486,32 @@ entry: declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone -define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test77: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: punpckhbw {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test77: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test77(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test77( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -591,37 +526,41 @@ entry: declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone -define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test76: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: packuswb {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test76: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: packuswb %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test76(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test76( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <4 x i16> [[TMP20]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP19]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP23]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP11]] to x86_mmx +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i16> [[TMP13]] to x86_mmx +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx [[TMP14]], x86_mmx [[TMP15]]) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[_MSPROP_VECTOR_PACK]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP22]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -636,37 +575,41 @@ entry: declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone -define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test75: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: packssdw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test75: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: packssdw %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test75(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test75( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <2 x i32> [[TMP20]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP19]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP23]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <2 x i1> [[TMP12]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[TMP11]] to x86_mmx +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i32> [[TMP13]] to x86_mmx +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx [[TMP14]], x86_mmx [[TMP15]]) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[_MSPROP_VECTOR_PACK]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP22]] +; entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -681,37 +624,41 @@ entry: declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone -define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test74: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: packsswb {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test74: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: packsswb %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test74(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test74( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <4 x i16> [[TMP20]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP19]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP23]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP11]] to x86_mmx +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i16> [[TMP13]] to x86_mmx +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx [[TMP14]], x86_mmx [[TMP15]]) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[_MSPROP_VECTOR_PACK]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP22]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -726,32 +673,30 @@ entry: declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone -define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test73: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psrad $3, %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test73: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: psrad $3, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test73(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test73( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx [[TMP10]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -764,32 +709,30 @@ entry: declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone -define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test72: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psraw $3, %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test72: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: psraw $3, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test72(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test72( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx [[TMP10]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -800,29 +743,30 @@ entry: ret i64 %4 } -define i64 @test72_2(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test72_2: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test72_2: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: retq +define i64 @test72_2(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test72_2( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx [[TMP10]], i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx [[MMX_VAR_I]], i32 0) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -835,28 +779,24 @@ entry: declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone -define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test71: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movq 8(%ebp), %mm0 -; X86-NEXT: psrlq $3, %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test71: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: psrlq $3, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test71(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test71( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[_MSPROP]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx [[TMP6]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to i64 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP4]] +; entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to x86_mmx @@ -867,32 +807,30 @@ entry: declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone -define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test70: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psrld $3, %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test70: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: psrld $3, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test70(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test70( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx [[TMP10]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -903,29 +841,30 @@ entry: ret i64 %4 } -define i64 @test70_2(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test70_2: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test70_2: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: retq +define i64 @test70_2(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test70_2( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx [[TMP10]], i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx [[MMX_VAR_I]], i32 0) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -938,32 +877,30 @@ entry: declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone -define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test69: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psrlw $3, %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test69: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: psrlw $3, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test69(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test69( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx [[TMP10]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -976,28 +913,24 @@ entry: declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone -define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test68: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movq 8(%ebp), %mm0 -; X86-NEXT: psllq $3, %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test68: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: psllq $3, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test68(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test68( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[_MSPROP]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx [[TMP6]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to i64 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP4]] +; entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to x86_mmx @@ -1008,32 +941,30 @@ entry: declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone -define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test67: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pslld $3, %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test67: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: pslld $3, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test67(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test67( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx [[TMP10]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -1046,32 +977,30 @@ entry: declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone -define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test66: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psllw $3, %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test66: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: psllw $3, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test66(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test66( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx [[TMP10]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -1082,29 +1011,30 @@ entry: ret i64 %4 } -define i64 @test66_2(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test66_2: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test66_2: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: retq +define i64 @test66_2(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test66_2( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx [[TMP10]], i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx [[MMX_VAR_I]], i32 0) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -1117,33 +1047,36 @@ entry: declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone -define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test65: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psrad 16(%ebp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test65: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psrad %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test65(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test65( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -1158,33 +1091,36 @@ entry: declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test64: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psraw 16(%ebp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test64: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psraw %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test64( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -1199,29 +1135,30 @@ entry: declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone -define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test63: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movq 8(%ebp), %mm0 -; X86-NEXT: psrlq 16(%ebp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test63: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psrlq %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test63(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test63( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i64 [[_MSPROP1]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[_MSPROP]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx [[TMP6]], x86_mmx [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to i64 +; CHECK-NEXT: store i64 [[TMP11]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP5]] +; entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to x86_mmx @@ -1234,33 +1171,36 @@ entry: declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone -define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test62: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psrld 16(%ebp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test62: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psrld %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test62(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test62( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -1275,33 +1215,36 @@ entry: declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test61: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psrlw 16(%ebp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test61: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psrlw %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test61(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test61( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -1316,29 +1259,30 @@ entry: declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone -define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test60: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movq 8(%ebp), %mm0 -; X86-NEXT: psllq 16(%ebp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test60: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psllq %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test60(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test60( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i64 [[_MSPROP1]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[_MSPROP]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx [[TMP6]], x86_mmx [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to i64 +; CHECK-NEXT: store i64 [[TMP11]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP5]] +; entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to x86_mmx @@ -1351,33 +1295,36 @@ entry: declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone -define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test59: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pslld 16(%ebp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test59: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pslld %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test59(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test59( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -1392,33 +1339,36 @@ entry: declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test58: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psllw 16(%ebp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test58: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psllw %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test58(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test58( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -1433,37 +1383,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone -define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test56: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pxor {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test56: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pxor %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test56(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test56( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -1478,37 +1423,32 @@ entry: declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone -define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test55: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: por {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test55: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: por %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test55(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test55( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -1523,37 +1463,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone -define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test54: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pandn {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test54: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pandn %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test54(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test54( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -1568,37 +1503,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone -define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test53: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pand {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test53: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pand %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test53(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test53( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -1613,37 +1543,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test52: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmullw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test52: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pmullw %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test52(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test52( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1656,37 +1581,32 @@ entry: ret i64 %5 } -define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test51: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmullw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test51: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pmullw %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test51(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test51( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1701,37 +1621,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test50: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmulhw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test50: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pmulhw %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test50(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test50( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1746,37 +1661,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone -define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test49: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmaddwd {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test49: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pmaddwd %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test49( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP13:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP15:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP19]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP12]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP18]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1791,37 +1705,32 @@ entry: declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test48: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psubusw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test48: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psubusw %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test48(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test48( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1836,37 +1745,32 @@ entry: declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone -define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test47: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psubusb {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test47: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psubusb %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test47(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test47( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -1881,37 +1785,32 @@ entry: declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test46: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psubsw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test46: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psubsw %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test46(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test46( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1926,37 +1825,32 @@ entry: declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone -define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test45: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psubsb {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test45: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psubsb %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test45(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test45( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -1969,29 +1863,25 @@ entry: ret i64 %5 } -define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test44: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movq 8(%ebp), %mm0 -; X86-NEXT: psubq 16(%ebp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test44: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psubq %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test44(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test44( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[_MSPROP2:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx [[MMX_VAR]], x86_mmx [[MMX_VAR1]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var = bitcast i64 %0 to x86_mmx @@ -2006,37 +1896,32 @@ declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone -define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test43: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psubd {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test43: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psubd %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test43(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test43( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -2051,37 +1936,32 @@ entry: declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test42: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psubw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test42: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psubw %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test42(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test42( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -2096,37 +1976,32 @@ entry: declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone -define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test41: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psubb {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test41: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psubb %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test41(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test41( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -2141,37 +2016,32 @@ entry: declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test40: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: paddusw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test40: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: paddusw %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test40(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test40( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -2186,37 +2056,32 @@ entry: declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone -define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test39: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: paddusb {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test39: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: paddusb %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test39(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test39( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -2231,37 +2096,32 @@ entry: declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test38: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: paddsw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test38: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: paddsw %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test38(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test38( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -2276,37 +2136,32 @@ entry: declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone -define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test37: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: paddsb {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test37: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: paddsb %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test37(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test37( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -2321,29 +2176,25 @@ entry: declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone -define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test36: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movq 8(%ebp), %mm0 -; X86-NEXT: paddq 16(%ebp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test36: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: paddq %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test36(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test36( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[_MSPROP2:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx [[MMX_VAR]], x86_mmx [[MMX_VAR1]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var = bitcast i64 %0 to x86_mmx @@ -2356,37 +2207,32 @@ entry: declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone -define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test35: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: paddd {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test35: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: paddd %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test35(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test35( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -2401,37 +2247,32 @@ entry: declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test34: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: paddw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test34: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: paddw %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test34(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test34( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -2446,37 +2287,32 @@ entry: declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone -define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test33: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: paddb {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test33: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: paddb %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test33(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test33( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -2491,37 +2327,30 @@ entry: declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone -define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test32: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psadbw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test32: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psadbw %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test32(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test32( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP12:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP12]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP4]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP13]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP10]], 48 +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: store i64 [[TMP11]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -2534,37 +2363,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test31: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pminsw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test31: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pminsw %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test31(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test31( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -2579,37 +2403,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone -define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test30: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pminub {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test30: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pminub %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test30(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test30( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -2624,37 +2443,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test29: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmaxsw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test29: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pmaxsw %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test29(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test29( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -2669,37 +2483,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone -define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test28: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmaxub {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test28: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pmaxub %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test28(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test28( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -2714,37 +2523,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test27: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pavgw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test27: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pavgw %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test27(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test27( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -2759,37 +2563,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone -define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test26: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pavgb {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test26: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pavgb %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test26(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test26( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -2804,19 +2603,27 @@ entry: declare void @llvm.x86.mmx.movnt.dq(ptr, x86_mmx) nounwind -define void @test25(ptr %p, <1 x i64> %a) nounwind optsize ssp { -; X86-LABEL: test25: -; X86: # %bb.0: # %entry -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movntq %mm0, (%eax) -; X86-NEXT: retl +define void @test25(ptr %p, <1 x i64> %a) nounwind optsize ssp #0 { +; CHECK-LABEL: define void @test25( +; CHECK-SAME: ptr [[P:%.*]], <1 x i64> [[A:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: tail call void @llvm.x86.mmx.movnt.dq(ptr [[P]], x86_mmx [[MMX_VAR_I]]) #[[ATTR2]] +; CHECK-NEXT: ret void ; -; X64-LABEL: test25: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rsi, %mm0 -; X64-NEXT: movntq %mm0, (%rdi) -; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to x86_mmx @@ -2826,28 +2633,26 @@ entry: declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone -define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test24: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: movq (%esp), %mm0 -; X86-NEXT: pmovmskb %mm0, %eax -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test24: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: pmovmskb %mm0, %eax -; X64-NEXT: retq +define i32 @test24(<1 x i64> %a) #0 { +; CHECK-LABEL: define i32 @test24( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP6]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx [[MMX_VAR_I]]) #[[ATTR2]] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP1]] +; entry: %0 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx @@ -2857,38 +2662,35 @@ entry: declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, ptr) nounwind -define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp { -; X86-LABEL: test23: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: pushl %edi -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: movl 24(%ebp), %edi -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq (%esp), %mm1 -; X86-NEXT: maskmovq %mm0, %mm1 -; X86-NEXT: leal -4(%ebp), %esp -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test23: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: maskmovq %mm1, %mm0 -; X64-NEXT: retq +define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp #0 { +; CHECK-LABEL: define void @test23( +; CHECK-SAME: <1 x i64> [[D:%.*]], <1 x i64> [[N:%.*]], ptr [[P:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[N]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[D]] to <8 x i8> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP5]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP3]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.x86.mmx.maskmovq(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]], ptr [[P]]) #[[ATTR2]] +; CHECK-NEXT: ret void +; entry: %0 = bitcast <1 x i64> %n to <8 x i8> %1 = bitcast <1 x i64> %d to <8 x i8> @@ -2900,37 +2702,32 @@ entry: declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test22: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmulhuw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test22: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pmulhuw %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test22(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test22( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -2945,31 +2742,29 @@ entry: declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone -define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test21: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: pshufw $3, {{[0-9]+}}(%esp), %mm0 # mm0 = mem[3,0,0,0] -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test21: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: pshufw $3, %mm0, %mm0 # mm0 = mm0[3,0,0,0] -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test21(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test21( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP10:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx [[TMP1]], i8 3) #[[ATTR5]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP5]] +; entry: %0 = bitcast <1 x i64> %a to <4 x i16> %1 = bitcast <4 x i16> %0 to x86_mmx @@ -2980,29 +2775,29 @@ entry: ret i64 %5 } -define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test21_2: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: pshufw $3, (%esp), %mm0 # mm0 = mem[3,0,0,0] -; X86-NEXT: movd %mm0, %eax -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test21_2: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: pshufw $3, %mm0, %mm0 # mm0 = mm0[3,0,0,0] -; X64-NEXT: movd %mm0, %eax -; X64-NEXT: retq +define i32 @test21_2(<1 x i64> %a) #0 { +; CHECK-LABEL: define i32 @test21_2( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP10:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx [[TMP1]], i8 3) #[[ATTR5]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP5]] +; entry: %0 = bitcast <1 x i64> %a to <4 x i16> %1 = bitcast <4 x i16> %0 to x86_mmx @@ -3015,37 +2810,27 @@ entry: declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone -define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test20: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmuludq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test20: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pmuludq %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test20(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test20( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP5]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP4]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP9]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -3058,27 +2843,26 @@ entry: declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone -define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test19: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: cvtpi2pd (%esp), %xmm0 -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test19: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: cvtpi2pd %mm0, %xmm0 -; X64-NEXT: retq +define <2 x double> @test19(<1 x i64> %a) #0 { +; CHECK-LABEL: define <2 x double> @test19( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP7]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx [[TMP1]]) #[[ATTR5]] +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; entry: %0 = bitcast <1 x i64> %a to <2 x i32> %1 = bitcast <2 x i32> %0 to x86_mmx @@ -3088,26 +2872,26 @@ entry: declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone -define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp { -; X86-LABEL: test18: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: cvttpd2pi %xmm0, %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test18: -; X64: # %bb.0: # %entry -; X64-NEXT: cvttpd2pi %xmm0, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test18(<2 x double> %a) #0 { +; CHECK-LABEL: define i64 @test18( +; CHECK-SAME: <2 x double> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP0:%.*]] = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> [[A]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast x86_mmx [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; entry: %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone %1 = bitcast x86_mmx %0 to <2 x i32> @@ -3118,26 +2902,26 @@ entry: declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone -define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp { -; X86-LABEL: test17: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: cvtpd2pi %xmm0, %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test17: -; X64: # %bb.0: # %entry -; X64-NEXT: cvtpd2pi %xmm0, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test17(<2 x double> %a) #0 { +; CHECK-LABEL: define i64 @test17( +; CHECK-SAME: <2 x double> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP0:%.*]] = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> [[A]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast x86_mmx [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; entry: %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone %1 = bitcast x86_mmx %0 to <2 x i32> @@ -3148,29 +2932,32 @@ entry: declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone -define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test16: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movq 8(%ebp), %mm0 -; X86-NEXT: palignr $16, 16(%ebp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test16: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: palignr $16, %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test16(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test16( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP6:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[_MSPROP1]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx [[MMX_VAR]], x86_mmx [[MMX_VAR1]], i8 16) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var = bitcast i64 %0 to x86_mmx @@ -3183,31 +2970,26 @@ entry: declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone -define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test15: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: pabsd {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test15: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: pabsd %mm0, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test15(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test15( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP8]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx [[TMP1]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP11]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP6]] to <1 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP10]] +; entry: %0 = bitcast <1 x i64> %a to <2 x i32> %1 = bitcast <2 x i32> %0 to x86_mmx @@ -3220,31 +3002,26 @@ entry: declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone -define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test14: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: pabsw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test14: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: pabsw %mm0, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test14(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test14( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP8]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx [[TMP1]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP6]] to <1 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP10]] +; entry: %0 = bitcast <1 x i64> %a to <4 x i16> %1 = bitcast <4 x i16> %0 to x86_mmx @@ -3257,31 +3034,26 @@ entry: declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone -define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp { -; X86-LABEL: test13: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: pabsb {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test13: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: pabsb %mm0, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test13(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test13( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx [[TMP1]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP11]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP10]] +; entry: %0 = bitcast <1 x i64> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to x86_mmx @@ -3294,37 +3066,32 @@ entry: declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone -define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test12: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psignd {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test12: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psignd %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test12(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test12( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -3339,37 +3106,32 @@ entry: declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test11: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psignw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test11: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psignw %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test11(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test11( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -3384,37 +3146,32 @@ entry: declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone -define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test10: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: psignb {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test10: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: psignb %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test10(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test10( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -3429,37 +3186,32 @@ entry: declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone -define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test9: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pshufb {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test9: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pshufb %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test9(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test9( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -3474,37 +3226,32 @@ entry: declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone -define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test8: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmulhrsw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test8: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pmulhrsw %mm0, %mm1 -; X64-NEXT: movq %mm1, %rax -; X64-NEXT: retq +define i64 @test8(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test8( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -3519,37 +3266,36 @@ entry: declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone -define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test7: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pmaddubsw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test7: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: pmaddubsw %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test7( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP15:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i8> [[TMP18]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP21]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[TMP14]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP20]] +; entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -3564,37 +3310,32 @@ entry: declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone -define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test6: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: phsubsw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test6: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: phsubsw %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test6(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test6( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -3609,37 +3350,32 @@ entry: declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone -define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test5: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: phsubd {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test5: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: phsubd %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test5(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test5( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -3654,37 +3390,32 @@ entry: declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone -define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test4: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: phsubw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test4: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: phsubw %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test4(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test4( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -3699,37 +3430,32 @@ entry: declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone -define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test3: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: phaddsw {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test3: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: phaddsw %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test3(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test3( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -3744,37 +3470,32 @@ entry: declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone -define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; X86-LABEL: test2: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: phaddd {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test2: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: movq %rsi, %mm1 -; X64-NEXT: phaddd %mm1, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define i64 @test2(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test2( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -3787,54 +3508,74 @@ entry: ret i64 %7 } -define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind { +define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind #0 { ; ALL-LABEL: test89: ; ALL: # %bb.0: ; ALL-NEXT: cvtpi2ps %mm0, %xmm0 ; ALL-NEXT: ret{{[l|q]}} +; CHECK-LABEL: define <4 x float> @test89( +; CHECK-SAME: <4 x float> [[A:%.*]], x86_mmx [[B:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[C:%.*]] = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> [[A]], x86_mmx [[B]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[C]] +; %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b) ret <4 x float> %c } declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone -define void @test90() { +define void @test90() #0 { ; ALL-LABEL: test90: ; ALL: # %bb.0: ; ALL-NEXT: emms ; ALL-NEXT: ret{{[l|q]}} +; CHECK-LABEL: define void @test90( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @llvm.x86.mmx.emms() +; CHECK-NEXT: ret void +; call void @llvm.x86.mmx.emms() ret void } declare void @llvm.x86.mmx.emms() -define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind { -; X86-LABEL: test_mm_insert_pi16: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: pinsrw $2, 16(%ebp), %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_mm_insert_pi16: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: pinsrw $2, %esi, %mm0 -; X64-NEXT: movq %mm0, %rax -; X64-NEXT: retq +define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind #0 { +; CHECK-LABEL: define <1 x i64> @test_mm_insert_pi16( +; CHECK-SAME: <1 x i64> [[A_COERCE:%.*]], i32 [[D:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP3:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP3]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A_COERCE]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP1:%.*]] = tail call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx [[TMP0]], i32 [[D]], i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to <1 x i64> +; CHECK-NEXT: store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP2]] +; entry: %0 = bitcast <1 x i64> %a.coerce to x86_mmx %1 = tail call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %0, i32 %d, i32 2) @@ -3844,28 +3585,24 @@ entry: declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32 immarg) -define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind { -; X86-LABEL: test_mm_extract_pi16: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: movq (%esp), %mm0 -; X86-NEXT: pextrw $2, %mm0, %eax -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_mm_extract_pi16: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdi, %mm0 -; X64-NEXT: pextrw $2, %mm0, %eax -; X64-NEXT: retq +define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind #0 { +; CHECK-LABEL: define i32 @test_mm_extract_pi16( +; CHECK-SAME: <1 x i64> [[A_COERCE:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A_COERCE]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.x86.mmx.pextr.w(x86_mmx [[TMP0]], i32 2) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP1]] +; entry: %0 = bitcast <1 x i64> %a.coerce to x86_mmx %1 = tail call i32 @llvm.x86.mmx.pextr.w(x86_mmx %0, i32 2) @@ -3873,3 +3610,8 @@ entry: } declare i32 @llvm.x86.mmx.pextr.w(x86_mmx, i32 immarg) + +attributes #0 = { sanitize_memory } +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1048575} +;.