diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index cf2b32c74eb5a5..355f5a217a9af6 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2252,19 +2252,19 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2) GV->setAlignment(PrefAlign); } - // If this is a memcpy (or similar) then we may be able to improve the - // alignment - if (MemIntrinsic *MI = dyn_cast(CI)) { - Align DestAlign = getKnownAlignment(MI->getDest(), *DL); - MaybeAlign MIDestAlign = MI->getDestAlign(); - if (!MIDestAlign || DestAlign > *MIDestAlign) - MI->setDestAlignment(DestAlign); - if (MemTransferInst *MTI = dyn_cast(MI)) { - MaybeAlign MTISrcAlign = MTI->getSourceAlign(); - Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL); - if (!MTISrcAlign || SrcAlign > *MTISrcAlign) - MTI->setSourceAlignment(SrcAlign); - } + } + // If this is a memcpy (or similar) then we may be able to improve the + // alignment. + if (MemIntrinsic *MI = dyn_cast(CI)) { + Align DestAlign = getKnownAlignment(MI->getDest(), *DL); + MaybeAlign MIDestAlign = MI->getDestAlign(); + if (!MIDestAlign || DestAlign > *MIDestAlign) + MI->setDestAlignment(DestAlign); + if (MemTransferInst *MTI = dyn_cast(MI)) { + MaybeAlign MTISrcAlign = MTI->getSourceAlign(); + Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL); + if (!MTISrcAlign || SrcAlign > *MTISrcAlign) + MTI->setSourceAlignment(SrcAlign); } } diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll index 77c2205018f95e..ee1bc4376a1c99 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll @@ -19,16 +19,16 @@ define protected amdgpu_kernel void @test(i8 addrspace(1)* nocapture %ptr.coerce ; GCN-LABEL: test: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: ds_read_u8 v1, v0 offset:1 +; GCN-NEXT: v_mov_b32_e32 v1, 2 +; GCN-NEXT: ds_write_b8 v0, v1 ; GCN-NEXT: ds_read_u8 v2, v0 offset:2 +; GCN-NEXT: ds_read_u16 v3, v0 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GCN-NEXT: v_mov_b32_e32 v3, 2 -; GCN-NEXT: ds_write_b8 v0, v3 -; GCN-NEXT: ds_write_b8 v0, v3 offset:4 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: ds_write_b8 v0, v1 offset:5 ; GCN-NEXT: ds_write_b8 v0, v2 offset:6 -; GCN-NEXT: v_mov_b32_e32 v1, 1 +; GCN-NEXT: ds_write_b16 v0, v3 offset:4 +; GCN-NEXT: v_cmp_eq_u16_sdwa s[2:3], v3, v1 src0_sel:BYTE_0 src1_sel:DWORD +; GCN-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3] ; GCN-NEXT: global_store_byte v0, v1, s[0:1] ; GCN-NEXT: s_endpgm ; CHECK-LABEL: @test( diff --git a/llvm/test/CodeGen/X86/mcu-abi.ll b/llvm/test/CodeGen/X86/mcu-abi.ll index fbec4dc813c2fd..53c228943d9148 100644 --- a/llvm/test/CodeGen/X86/mcu-abi.ll +++ b/llvm/test/CodeGen/X86/mcu-abi.ll @@ -64,13 +64,14 @@ entry: define void @ret_large_struct(ptr noalias nocapture sret(%struct.st12_t) %agg.result, ptr byval(%struct.st12_t) nocapture readonly align 4 %r) #0 { ; CHECK-LABEL: ret_large_struct: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: leal {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movl $48, %ecx -; CHECK-NEXT: calll memcpy -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl $12, %ecx +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) ; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi ; CHECK-NEXT: retl entry: call void @llvm.memcpy.p0.p0.i32(ptr %agg.result, ptr %r, i32 48, i1 false) diff --git a/llvm/test/CodeGen/X86/memset-2.ll b/llvm/test/CodeGen/X86/memset-2.ll index 72ef36fc82733c..a4fbbc57004708 100644 --- a/llvm/test/CodeGen/X86/memset-2.ll +++ b/llvm/test/CodeGen/X86/memset-2.ll @@ -1,31 +1,33 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=i386-apple-darwin9 -mcpu=yonah < %s | FileCheck %s -define fastcc void @t1() nounwind { +define fastcc void @t1(ptr nocapture %s) nounwind { ; CHECK-LABEL: t1: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: subl $16, %esp ; CHECK-NEXT: pushl $188 ; CHECK-NEXT: pushl $0 -; CHECK-NEXT: pushl $0 +; CHECK-NEXT: pushl %ecx ; CHECK-NEXT: calll _memset ; CHECK-NEXT: addl $16, %esp ; CHECK-NEXT: ud2 entry: - call void @llvm.memset.p0.i32(ptr null, i8 0, i32 188, i1 false) + call void @llvm.memset.p0.i32(ptr %s, i8 0, i32 188, i1 false) unreachable } -define fastcc void @t2(i8 signext %c) nounwind { +define fastcc void @t2(ptr nocapture %s, i8 signext %c) nounwind { ; CHECK-LABEL: t2: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: subl $12, %esp -; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl $76, {{[0-9]+}}(%esp) +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: pushl $76 +; CHECK-NEXT: pushl %edx +; CHECK-NEXT: pushl %ecx ; CHECK-NEXT: calll _memset +; CHECK-NEXT: addl $16, %esp ; CHECK-NEXT: ud2 entry: - call void @llvm.memset.p0.i32(ptr undef, i8 %c, i32 76, i1 false) + call void @llvm.memset.p0.i32(ptr %s, i8 %c, i32 76, i1 false) unreachable } diff --git a/llvm/test/CodeGen/X86/memset64-on-x86-32.ll b/llvm/test/CodeGen/X86/memset64-on-x86-32.ll index 7b939dfb425967..c6eecdcdf99cc5 100644 --- a/llvm/test/CodeGen/X86/memset64-on-x86-32.ll +++ b/llvm/test/CodeGen/X86/memset64-on-x86-32.ll @@ -3,55 +3,57 @@ ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=SLOW_32 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=SLOW_64 -define void @bork() nounwind { +define void @bork(ptr nocapture align 4 %dst) nounwind { ; FAST-LABEL: bork: ; FAST: # %bb.0: +; FAST-NEXT: movl {{[0-9]+}}(%esp), %eax ; FAST-NEXT: xorps %xmm0, %xmm0 -; FAST-NEXT: movups %xmm0, 64 -; FAST-NEXT: movups %xmm0, 48 -; FAST-NEXT: movups %xmm0, 32 -; FAST-NEXT: movups %xmm0, 16 -; FAST-NEXT: movups %xmm0, 0 +; FAST-NEXT: movups %xmm0, 64(%eax) +; FAST-NEXT: movups %xmm0, 48(%eax) +; FAST-NEXT: movups %xmm0, 32(%eax) +; FAST-NEXT: movups %xmm0, 16(%eax) +; FAST-NEXT: movups %xmm0, (%eax) ; FAST-NEXT: retl ; ; SLOW_32-LABEL: bork: ; SLOW_32: # %bb.0: -; SLOW_32-NEXT: movl $0, 4 -; SLOW_32-NEXT: movl $0, 0 -; SLOW_32-NEXT: movl $0, 12 -; SLOW_32-NEXT: movl $0, 8 -; SLOW_32-NEXT: movl $0, 20 -; SLOW_32-NEXT: movl $0, 16 -; SLOW_32-NEXT: movl $0, 28 -; SLOW_32-NEXT: movl $0, 24 -; SLOW_32-NEXT: movl $0, 36 -; SLOW_32-NEXT: movl $0, 32 -; SLOW_32-NEXT: movl $0, 44 -; SLOW_32-NEXT: movl $0, 40 -; SLOW_32-NEXT: movl $0, 52 -; SLOW_32-NEXT: movl $0, 48 -; SLOW_32-NEXT: movl $0, 60 -; SLOW_32-NEXT: movl $0, 56 -; SLOW_32-NEXT: movl $0, 68 -; SLOW_32-NEXT: movl $0, 64 -; SLOW_32-NEXT: movl $0, 76 -; SLOW_32-NEXT: movl $0, 72 +; SLOW_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; SLOW_32-NEXT: movl $0, 4(%eax) +; SLOW_32-NEXT: movl $0, (%eax) +; SLOW_32-NEXT: movl $0, 12(%eax) +; SLOW_32-NEXT: movl $0, 8(%eax) +; SLOW_32-NEXT: movl $0, 20(%eax) +; SLOW_32-NEXT: movl $0, 16(%eax) +; SLOW_32-NEXT: movl $0, 28(%eax) +; SLOW_32-NEXT: movl $0, 24(%eax) +; SLOW_32-NEXT: movl $0, 36(%eax) +; SLOW_32-NEXT: movl $0, 32(%eax) +; SLOW_32-NEXT: movl $0, 44(%eax) +; SLOW_32-NEXT: movl $0, 40(%eax) +; SLOW_32-NEXT: movl $0, 52(%eax) +; SLOW_32-NEXT: movl $0, 48(%eax) +; SLOW_32-NEXT: movl $0, 60(%eax) +; SLOW_32-NEXT: movl $0, 56(%eax) +; SLOW_32-NEXT: movl $0, 68(%eax) +; SLOW_32-NEXT: movl $0, 64(%eax) +; SLOW_32-NEXT: movl $0, 76(%eax) +; SLOW_32-NEXT: movl $0, 72(%eax) ; SLOW_32-NEXT: retl ; ; SLOW_64-LABEL: bork: ; SLOW_64: # %bb.0: -; SLOW_64-NEXT: movq $0, 72 -; SLOW_64-NEXT: movq $0, 64 -; SLOW_64-NEXT: movq $0, 56 -; SLOW_64-NEXT: movq $0, 48 -; SLOW_64-NEXT: movq $0, 40 -; SLOW_64-NEXT: movq $0, 32 -; SLOW_64-NEXT: movq $0, 24 -; SLOW_64-NEXT: movq $0, 16 -; SLOW_64-NEXT: movq $0, 8 -; SLOW_64-NEXT: movq $0, 0 +; SLOW_64-NEXT: movq $0, 72(%rdi) +; SLOW_64-NEXT: movq $0, 64(%rdi) +; SLOW_64-NEXT: movq $0, 56(%rdi) +; SLOW_64-NEXT: movq $0, 48(%rdi) +; SLOW_64-NEXT: movq $0, 40(%rdi) +; SLOW_64-NEXT: movq $0, 32(%rdi) +; SLOW_64-NEXT: movq $0, 24(%rdi) +; SLOW_64-NEXT: movq $0, 16(%rdi) +; SLOW_64-NEXT: movq $0, 8(%rdi) +; SLOW_64-NEXT: movq $0, (%rdi) ; SLOW_64-NEXT: retq - call void @llvm.memset.p0.i64(ptr align 4 null, i8 0, i64 80, i1 false) + call void @llvm.memset.p0.i64(ptr align 4 %dst, i8 0, i64 80, i1 false) ret void }