diff --git a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl index 65f6f2e7d8c24..859e81f08d6bd 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl @@ -515,13 +515,17 @@ typedef struct { private char *p; } StructTy3; -// CHECK-LABEL: test_memset_private -// CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* noundef align 8 {{.*}}, i8 0, i64 32, i1 false) -// CHECK: [[GEP:%.*]] = getelementptr inbounds %struct.StructTy3, %struct.StructTy3 addrspace(5)* %ptr, i32 0, i32 4 -// CHECK: store i8 addrspace(5)* addrspacecast (i8* null to i8 addrspace(5)*), i8 addrspace(5)* addrspace(5)* [[GEP]] -// CHECK: [[GEP1:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* {{.*}}, i32 36 -// CHECK: [[GEP1_CAST:%.*]] = bitcast i8 addrspace(5)* [[GEP1]] to i32 addrspace(5)* -// CHECK: store i32 0, i32 addrspace(5)* [[GEP1_CAST]], align 4 +// CHECK-LABEL: @test_memset_private( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[STRUCT_STRUCTTY3:%.*]] addrspace(5)* [[PTR:%.*]] to i8 addrspace(5)* +// CHECK-NEXT: [[S3_SROA_0_SROA_0_0_S3_SROA_0_0__SROA_CAST2_SROA_CAST:%.*]] = bitcast [[STRUCT_STRUCTTY3]] addrspace(5)* [[PTR]] to <32 x i8> addrspace(5)* +// CHECK-NEXT: store <32 x i8> zeroinitializer, <32 x i8> addrspace(5)* [[S3_SROA_0_SROA_0_0_S3_SROA_0_0__SROA_CAST2_SROA_CAST]], align 8, !tbaa.struct !9 +// CHECK-NEXT: [[S3_SROA_4_0__SROA_IDX6:%.*]] = getelementptr inbounds [[STRUCT_STRUCTTY3]], [[STRUCT_STRUCTTY3]] addrspace(5)* [[PTR]], i32 0, i32 4 +// CHECK-NEXT: store i8 addrspace(5)* addrspacecast (i8* null to i8 addrspace(5)*), i8 addrspace(5)* addrspace(5)* [[S3_SROA_4_0__SROA_IDX6]], align 8, !tbaa.struct !12 +// CHECK-NEXT: [[S3_SROA_5_0__SROA_IDX:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP0]], i32 36 +// CHECK-NEXT: [[S3_SROA_5_0__SROA_CAST8:%.*]] = bitcast i8 addrspace(5)* [[S3_SROA_5_0__SROA_IDX]] to i32 addrspace(5)* +// CHECK-NEXT: store i32 0, i32 addrspace(5)* [[S3_SROA_5_0__SROA_CAST8]], align 4, !tbaa.struct !13 +// CHECK-NEXT: ret void void test_memset_private(private StructTy3 *ptr) { StructTy3 S3 = {0, 0, 0, 0, 0}; *ptr = S3; diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 0d045656661a1..0fafa3af76367 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1806,8 +1806,10 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, ? Ty->getElementType() : FixedVectorType::get(Ty->getElementType(), NumElements); - Type *SplitIntTy = - Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8); + Type *SplitIntTy = nullptr; + if (uint64_t Bitwidth = NumElements * ElementSize * 8; + Bitwidth <= IntegerType::MAX_INT_BITS) + SplitIntTy = Type::getIntNTy(Ty->getContext(), Bitwidth); Use *U = S.getUse(); @@ -1826,7 +1828,8 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, // Disable vector promotion when there are loads or stores of an FCA. if (LTy->isStructTy()) return false; - if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) { + if (SplitIntTy && + (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset())) { assert(LTy->isIntegerTy()); LTy = SplitIntTy; } @@ -1839,7 +1842,8 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, // Disable vector promotion when there are loads or stores of an FCA. if (STy->isStructTy()) return false; - if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) { + if (SplitIntTy && + (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset())) { assert(STy->isIntegerTy()); STy = SplitIntTy; } @@ -1934,6 +1938,9 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { CheckCandidateType(LI->getType()); else if (auto *SI = dyn_cast(S.getUse()->getUser())) CheckCandidateType(SI->getValueOperand()->getType()); + else if (auto *MTI = dyn_cast(S.getUse()->getUser())) + CheckCandidateType(FixedVectorType::get( + IntegerType::getInt8Ty(MTI->getContext()), P.size())); } // If we didn't find a vector type, nothing to do here. diff --git a/llvm/test/CodeGen/AMDGPU/v1024.ll b/llvm/test/CodeGen/AMDGPU/v1024.ll index 1326ba437f94f..6dbb9443fd5cf 100644 --- a/llvm/test/CodeGen/AMDGPU/v1024.ll +++ b/llvm/test/CodeGen/AMDGPU/v1024.ll @@ -4,7 +4,7 @@ ; GCN-LABEL: {{^}}test_v1024: ; GCN-NOT: v_accvgpr -; GCN-COUNT-32: v_mov_b32_e32 +; GCN-COUNT-10: v_mov_b32_e32 ; GCN-NOT: v_accvgpr define amdgpu_kernel void @test_v1024() { entry: diff --git a/llvm/test/DebugInfo/X86/sroasplit-1.ll b/llvm/test/DebugInfo/X86/sroasplit-1.ll index 0ec368130da28..5a80b56950122 100644 --- a/llvm/test/DebugInfo/X86/sroasplit-1.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-1.ll @@ -20,10 +20,8 @@ ; ; Verify that SROA creates a variable piece when splitting i1. -; CHECK: %[[I1:.*]] = alloca [12 x i8], align 4 -; CHECK: call void @llvm.dbg.declare(metadata [12 x i8]* %[[I1]], metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 96)) -; CHECK: call void @llvm.dbg.value(metadata i32 %[[A:.*]], metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)) -; CHECK: ret i32 %[[A]] +; CHECK: %[[I1:.*]] = load <12 x i8>, +; CHECK: call void @llvm.dbg.value(metadata <12 x i8> %[[I1]], metadata ![[VAR:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 96)) ; Read Var and Piece: ; CHECK: ![[VAR]] = !DILocalVariable(name: "i1",{{.*}} line: 11, diff --git a/llvm/test/DebugInfo/X86/sroasplit-4.ll b/llvm/test/DebugInfo/X86/sroasplit-4.ll index 0d5594ef867dd..a3b35b820a96c 100644 --- a/llvm/test/DebugInfo/X86/sroasplit-4.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-4.ll @@ -1,28 +1,28 @@ ; RUN: opt -sroa < %s -S -o - | FileCheck %s ; ; Test that recursively splitting an alloca updates the debug info correctly. -; CHECK: %[[T:.*]] = load i64, i64* @t, align 8 -; CHECK: call void @llvm.dbg.value(metadata i64 %[[T]], metadata ![[Y:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)) -; CHECK: %[[T1:.*]] = load i64, i64* @t, align 8 -; CHECK: call void @llvm.dbg.value(metadata i64 %[[T1]], metadata ![[Y]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) -; CHECK: call void @llvm.dbg.value(metadata i64 %[[T]], metadata ![[R:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 192, 64)) -; CHECK: call void @llvm.dbg.value(metadata i64 %[[T1]], metadata ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 256, 64)) -; +; CHECK: call void @llvm.dbg.value(metadata <16 x i8> %[[Y_VEC:.*]], metadata ![[Y:.*]], metadata !DIExpression()) +; CHECK: call void @llvm.dbg.value(metadata <16 x i8> %[[Y_VEC1:.*]], metadata ![[Y]], metadata !DIExpression()) +; CHECK: call void @llvm.dbg.value(metadata i32 0, metadata ![[R:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)) +; CHECK: call void @llvm.dbg.value(metadata i64 0, metadata ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) +; CHECK: call void @llvm.dbg.value(metadata i64 0, metadata ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64)) +; CHECK: call void @llvm.dbg.value(metadata <16 x i8> %[[Y_VEC1]], metadata ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 192, 128)) +; ; struct p { ; __SIZE_TYPE__ s; ; __SIZE_TYPE__ t; ; }; -; +; ; struct r { ; int i; ; struct p x; ; struct p y; ; }; -; +; ; extern int call_me(struct r); ; extern int maybe(); ; extern __SIZE_TYPE__ t; -; +; ; int test() { ; if (maybe()) ; return 0; diff --git a/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll index 886bde2686bd9..3361ad11c244b 100644 --- a/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll +++ b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll @@ -68,12 +68,13 @@ define dso_local i32* @_Z3foo1S(%0* byval(%0) align 8 %arg) { ; CHECK-LABEL: @_Z3foo1S( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I2:%.*]] = alloca [[TMP0:%.*]], align 8 -; CHECK-NEXT: [[I1_SROA_0_0_I5_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I5_SROA_IDX]], align 8 +; CHECK-NEXT: [[TMP0]] = bitcast %0* [[ARG:%.*]] to i64* +; CHECK-NEXT: [[I11_SROA_0_0_VEC_EXTRACT_EXTRACT:%.*]] = load i64, i64* [[TMP0]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[I11_SROA_0_0_VEC_EXTRACT_EXTRACT]] to i32* ; CHECK-NEXT: [[I_SROA_0_0_I6_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I2]], i64 0, i32 0 -; CHECK-NEXT: store i32* [[I1_SROA_0_0_COPYLOAD]], i32** [[I_SROA_0_0_I6_SROA_IDX]], align 8 +; CHECK-NEXT: store i32* [[TMP1]], i32** [[I_SROA_0_0_I6_SROA_IDX]], align 8 ; CHECK-NEXT: tail call void @_Z7escape01S(%0* nonnull byval([[TMP0]]) align 8 [[I2]]) -; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]] +; CHECK-NEXT: ret i32* [[TMP1]] ; bb: %i = alloca %0, align 8 @@ -107,21 +108,22 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) define dso_local i32* @_Z3bar1S(%0* byval(%0) align 8 %arg) { ; CHECK-LABEL: @_Z3bar1S( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[I1_SROA_0_0_I4_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* [[ARG:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I4_SROA_IDX]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %0* [[ARG:%.*]] to i64* +; CHECK-NEXT: [[I13_SROA_0_0_VEC_EXTRACT_EXTRACT:%.*]] = load i64, i64* [[TMP0]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[I13_SROA_0_0_VEC_EXTRACT_EXTRACT]] to i32* ; CHECK-NEXT: [[I5:%.*]] = tail call i32 @_Z4condv() ; CHECK-NEXT: [[I6_NOT:%.*]] = icmp eq i32 [[I5]], 0 ; CHECK-NEXT: br i1 [[I6_NOT]], label [[BB10:%.*]], label [[BB7:%.*]] ; CHECK: bb7: ; CHECK-NEXT: tail call void @_Z5sync0v() -; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[I1_SROA_0_0_COPYLOAD]]) +; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[TMP1]]) ; CHECK-NEXT: br label [[BB13:%.*]] ; CHECK: bb10: ; CHECK-NEXT: tail call void @_Z5sync1v() -; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[I1_SROA_0_0_COPYLOAD]]) +; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[TMP1]]) ; CHECK-NEXT: br label [[BB13]] ; CHECK: bb13: -; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]] +; CHECK-NEXT: ret i32* [[TMP1]] ; bb: %i = alloca %0, align 8 diff --git a/llvm/test/Transforms/SROA/address-spaces.ll b/llvm/test/Transforms/SROA/address-spaces.ll index 70e1a682d7bfc..0300e99f9a217 100644 --- a/llvm/test/Transforms/SROA/address-spaces.ll +++ b/llvm/test/Transforms/SROA/address-spaces.ll @@ -11,8 +11,8 @@ declare void @llvm.memcpy.p1.p1.i32(ptr addrspace(1) nocapture, ptr addrspace(1) ; Make sure an illegal bitcast isn't introduced define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b) { ; CHECK-LABEL: @test_address_space_1_1( -; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) [[A:%.*]], align 2 -; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(1) [[A:%.*]], align 2 +; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16 @@ -23,8 +23,8 @@ define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b) { define void @test_address_space_1_0(ptr addrspace(1) %a, ptr %b) { ; CHECK-LABEL: @test_address_space_1_0( -; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) [[A:%.*]], align 2 -; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(1) [[A:%.*]], align 2 +; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16 @@ -35,8 +35,8 @@ define void @test_address_space_1_0(ptr addrspace(1) %a, ptr %b) { define void @test_address_space_0_1(ptr %a, ptr addrspace(1) %b) { ; CHECK-LABEL: @test_address_space_0_1( -; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 2 -; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[A:%.*]], align 2 +; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16 diff --git a/llvm/test/Transforms/SROA/alignment.ll b/llvm/test/Transforms/SROA/alignment.ll index 66da09cd52d27..ba673b1d16465 100644 --- a/llvm/test/Transforms/SROA/alignment.ll +++ b/llvm/test/Transforms/SROA/alignment.ll @@ -92,15 +92,15 @@ define void @PR13920(ptr %a, ptr %b) { ; Test that alignments on memcpy intrinsics get propagated to loads and stores. ; CHECK-LABEL: @PR13920( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 2 -; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[A:%.*]], align 2 +; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; ; DEBUGLOC-LABEL: @PR13920( ; DEBUGLOC-NEXT: entry: ; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG38:![0-9]+]] -; DEBUGLOC-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 2, !dbg [[DBG39:![0-9]+]] -; DEBUGLOC-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr [[B:%.*]], align 2, !dbg [[DBG40:![0-9]+]] +; DEBUGLOC-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[A:%.*]], align 2, !dbg [[DBG39:![0-9]+]] +; DEBUGLOC-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr [[B:%.*]], align 2, !dbg [[DBG40:![0-9]+]] ; DEBUGLOC-NEXT: ret void, !dbg [[DBG41:![0-9]+]] ; @@ -118,21 +118,17 @@ define void @test3(ptr %x) { ; reduce the alignment. ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [22 x i8], align 8 -; CHECK-NEXT: [[B_SROA_0:%.*]] = alloca [18 x i8], align 2 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[A_SROA_0]], ptr align 8 [[X:%.*]], i32 22, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[B_SROA_0]], ptr align 2 [[X]], i32 18, i1 false) +; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load <22 x i8>, ptr [[X:%.*]], align 8 +; CHECK-NEXT: [[B_SROA_0_6_COPYLOAD:%.*]] = load <18 x i8>, ptr [[X]], align 2 ; CHECK-NEXT: ret void ; ; DEBUGLOC-LABEL: @test3( ; DEBUGLOC-NEXT: entry: -; DEBUGLOC-NEXT: [[A_SROA_0:%.*]] = alloca [22 x i8], align 8, !dbg [[DBG47:![0-9]+]] -; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47]] -; DEBUGLOC-NEXT: [[B_SROA_0:%.*]] = alloca [18 x i8], align 2, !dbg [[DBG48:![0-9]+]] -; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] -; DEBUGLOC-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[A_SROA_0]], ptr align 8 [[X:%.*]], i32 22, i1 false), !dbg [[DBG49:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load <22 x i8>, ptr [[X:%.*]], align 8, !dbg [[DBG49:![0-9]+]] ; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META46:![0-9]+]], metadata !DIExpression()), !dbg [[DBG50:![0-9]+]] -; DEBUGLOC-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[B_SROA_0]], ptr align 2 [[X]], i32 18, i1 false), !dbg [[DBG51:![0-9]+]] +; DEBUGLOC-NEXT: [[B_SROA_0_6_COPYLOAD:%.*]] = load <18 x i8>, ptr [[X]], align 2, !dbg [[DBG51:![0-9]+]] ; DEBUGLOC-NEXT: ret void, !dbg [[DBG52:![0-9]+]] ; diff --git a/llvm/test/Transforms/SROA/alloca-address-space.ll b/llvm/test/Transforms/SROA/alloca-address-space.ll index d4f305c39c8fe..b06f269d806a5 100644 --- a/llvm/test/Transforms/SROA/alloca-address-space.ll +++ b/llvm/test/Transforms/SROA/alloca-address-space.ll @@ -10,8 +10,8 @@ declare void @llvm.memcpy.p1.p1.i32(ptr addrspace(1) nocapture, ptr addrspace(1) define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b) { ; CHECK-LABEL: @test_address_space_1_1( -; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) [[A:%.*]], align 2 -; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(1) [[A:%.*]], align 2 +; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16, addrspace(2) @@ -22,8 +22,8 @@ define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b) { define void @test_address_space_1_0(ptr addrspace(1) %a, ptr addrspace(2) %b) { ; CHECK-LABEL: @test_address_space_1_0( -; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) [[A:%.*]], align 2 -; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(2) [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(1) [[A:%.*]], align 2 +; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(2) [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16, addrspace(2) @@ -34,8 +34,8 @@ define void @test_address_space_1_0(ptr addrspace(1) %a, ptr addrspace(2) %b) { define void @test_address_space_0_1(ptr addrspace(2) %a, ptr addrspace(1) %b) { ; CHECK-LABEL: @test_address_space_0_1( -; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(2) [[A:%.*]], align 2 -; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(2) [[A:%.*]], align 2 +; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16, addrspace(2) diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll index 5ac8ed8c6e6a3..18874617e0b26 100644 --- a/llvm/test/Transforms/SROA/basictest.ll +++ b/llvm/test/Transforms/SROA/basictest.ll @@ -139,100 +139,83 @@ L2: define void @test3(ptr %dst, ptr align 8 %src) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [42 x i8], align 1 -; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca [99 x i8], align 1 -; CHECK-NEXT: [[A_SROA_32:%.*]] = alloca [16 x i8], align 1 -; CHECK-NEXT: [[A_SROA_15:%.*]] = alloca [42 x i8], align 1 -; CHECK-NEXT: [[A_SROA_16:%.*]] = alloca [7 x i8], align 1 -; CHECK-NEXT: [[A_SROA_235:%.*]] = alloca [7 x i8], align 1 -; CHECK-NEXT: [[A_SROA_31:%.*]] = alloca [85 x i8], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_0]], ptr align 8 [[SRC:%.*]], i32 42, i1 false), !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[A_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <42 x i8>, ptr [[SRC:%.*]], align 8, !tbaa [[TBAA0:![0-9]+]] ; CHECK-NEXT: [[A_SROA_2_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 42 ; CHECK-NEXT: [[A_SROA_2_0_COPYLOAD:%.*]] = load i8, ptr [[A_SROA_2_0_SRC_SROA_IDX]], align 2, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 43 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_3]], ptr align 1 [[A_SROA_3_0_SRC_SROA_IDX]], i32 99, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_3_SROA_0_0_COPYLOAD:%.*]] = load <99 x i8>, ptr [[A_SROA_3_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_32_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 142 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_32]], ptr align 2 [[A_SROA_32_0_SRC_SROA_IDX]], i32 16, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[A_SROA_32_0_SRC_SROA_IDX]], align 2, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_15_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 158 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_15]], ptr align 2 [[A_SROA_15_0_SRC_SROA_IDX]], i32 42, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_15_SROA_0_0_COPYLOAD:%.*]] = load <42 x i8>, ptr [[A_SROA_15_0_SRC_SROA_IDX]], align 2, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_16_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 200 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_16]], ptr align 8 [[A_SROA_16_0_SRC_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_0_COPYLOAD:%.*]] = load <7 x i8>, ptr [[A_SROA_16_0_SRC_SROA_IDX]], align 8, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_23_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 207 ; CHECK-NEXT: [[A_SROA_23_0_COPYLOAD:%.*]] = load i8, ptr [[A_SROA_23_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_235_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 208 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235]], ptr align 8 [[A_SROA_235_0_SRC_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_0_COPYLOAD:%.*]] = load <7 x i8>, ptr [[A_SROA_235_0_SRC_SROA_IDX]], align 8, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_31_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 215 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31]], ptr align 1 [[A_SROA_31_0_SRC_SROA_IDX]], i32 85, i1 false), !tbaa [[TBAA0]] -; CHECK-NEXT: store i8 1, ptr [[A_SROA_32]], align 1, !tbaa [[TBAA3:![0-9]+]] -; CHECK-NEXT: store i16 1, ptr [[A_SROA_32]], align 1, !tbaa [[TBAA5:![0-9]+]] -; CHECK-NEXT: store i32 1, ptr [[A_SROA_32]], align 1, !tbaa [[TBAA7:![0-9]+]] -; CHECK-NEXT: store i64 1, ptr [[A_SROA_32]], align 1, !tbaa [[TBAA9:![0-9]+]] -; CHECK-NEXT: [[A_SROA_32_1_OVERLAP_2_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 1 -; CHECK-NEXT: store i64 2, ptr [[A_SROA_32_1_OVERLAP_2_I8_SROA_IDX]], align 1, !tbaa [[TBAA11:![0-9]+]] -; CHECK-NEXT: [[A_SROA_32_2_OVERLAP_3_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 2 -; CHECK-NEXT: store i64 3, ptr [[A_SROA_32_2_OVERLAP_3_I8_SROA_IDX]], align 1, !tbaa [[TBAA13:![0-9]+]] -; CHECK-NEXT: [[A_SROA_32_3_OVERLAP_4_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 3 -; CHECK-NEXT: store i64 4, ptr [[A_SROA_32_3_OVERLAP_4_I8_SROA_IDX]], align 1, !tbaa [[TBAA15:![0-9]+]] -; CHECK-NEXT: [[A_SROA_32_4_OVERLAP_5_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 4 -; CHECK-NEXT: store i64 5, ptr [[A_SROA_32_4_OVERLAP_5_I8_SROA_IDX]], align 1, !tbaa [[TBAA17:![0-9]+]] -; CHECK-NEXT: [[A_SROA_32_5_OVERLAP_6_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 5 -; CHECK-NEXT: store i64 6, ptr [[A_SROA_32_5_OVERLAP_6_I8_SROA_IDX]], align 1, !tbaa [[TBAA19:![0-9]+]] -; CHECK-NEXT: [[A_SROA_32_6_OVERLAP_7_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 6 -; CHECK-NEXT: store i64 7, ptr [[A_SROA_32_6_OVERLAP_7_I8_SROA_IDX]], align 1, !tbaa [[TBAA21:![0-9]+]] -; CHECK-NEXT: [[A_SROA_32_7_OVERLAP_8_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 7 -; CHECK-NEXT: store i64 8, ptr [[A_SROA_32_7_OVERLAP_8_I8_SROA_IDX]], align 1, !tbaa [[TBAA23:![0-9]+]] -; CHECK-NEXT: [[A_SROA_32_8_OVERLAP_9_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 8 -; CHECK-NEXT: store i64 9, ptr [[A_SROA_32_8_OVERLAP_9_I8_SROA_IDX]], align 1, !tbaa [[TBAA25:![0-9]+]] -; CHECK-NEXT: store i8 1, ptr [[A_SROA_16]], align 1, !tbaa [[TBAA27:![0-9]+]] -; CHECK-NEXT: store i16 1, ptr [[A_SROA_16]], align 1, !tbaa [[TBAA29:![0-9]+]] -; CHECK-NEXT: store i32 1, ptr [[A_SROA_16]], align 1, !tbaa [[TBAA31:![0-9]+]] -; CHECK-NEXT: [[A_SROA_16_1_OVERLAP2_1_1_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_16]], i64 1 -; CHECK-NEXT: store i32 2, ptr [[A_SROA_16_1_OVERLAP2_1_1_I8_SROA_IDX]], align 1, !tbaa [[TBAA33:![0-9]+]] -; CHECK-NEXT: [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_16]], i64 2 -; CHECK-NEXT: store i32 3, ptr [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX]], align 1, !tbaa [[TBAA35:![0-9]+]] -; CHECK-NEXT: [[A_SROA_16_3_OVERLAP2_1_3_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_16]], i64 3 -; CHECK-NEXT: store i32 4, ptr [[A_SROA_16_3_OVERLAP2_1_3_I8_SROA_IDX]], align 1, !tbaa [[TBAA37:![0-9]+]] -; CHECK-NEXT: store i32 1, ptr [[A_SROA_235]], align 1, !tbaa [[TBAA39:![0-9]+]] -; CHECK-NEXT: [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX11:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1 -; CHECK-NEXT: store i8 1, ptr [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX11]], align 1, !tbaa [[TBAA41:![0-9]+]] -; CHECK-NEXT: [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX10:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1 -; CHECK-NEXT: store i16 1, ptr [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX10]], align 1, !tbaa [[TBAA43:![0-9]+]] -; CHECK-NEXT: [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1 -; CHECK-NEXT: store i32 1, ptr [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX]], align 1, !tbaa [[TBAA45:![0-9]+]] -; CHECK-NEXT: [[A_SROA_235_2_OVERLAP2_2_2_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 2 -; CHECK-NEXT: store i32 3, ptr [[A_SROA_235_2_OVERLAP2_2_2_I8_SROA_IDX]], align 1, !tbaa [[TBAA47:![0-9]+]] -; CHECK-NEXT: [[A_SROA_235_3_OVERLAP2_2_3_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 3 -; CHECK-NEXT: store i32 4, ptr [[A_SROA_235_3_OVERLAP2_2_3_I8_SROA_IDX]], align 1, !tbaa [[TBAA49:![0-9]+]] -; CHECK-NEXT: [[A_SROA_15_197_OVERLAP2_PREFIX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_15]], i64 39 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_15_197_OVERLAP2_PREFIX_SROA_IDX]], ptr align 1 [[SRC]], i32 3, i1 false), !tbaa [[TBAA51:![0-9]+]] +; CHECK-NEXT: [[A_SROA_31_SROA_0_0_COPYLOAD:%.*]] = load <85 x i8>, ptr [[A_SROA_31_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_0_VEC_INSERT:%.*]] = insertelement <16 x i8> [[A_SROA_32_SROA_0_0_COPYLOAD]], i8 1, i32 0 +; CHECK-NEXT: [[A_SROA_32_SROA_0_0_VECBLEND30:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i16> to <2 x i8>), i32 0), i8 extractelement (<2 x i8> bitcast (<1 x i16> to <2 x i8>), i32 1), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_0_VEC_INSERT]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_0_VECBLEND28:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_0_VECBLEND30]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_0_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_0_VECBLEND28]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_1_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_0_VECBLEND]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_2_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_1_VECBLEND]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_3_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_2_VECBLEND]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_4_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_3_VECBLEND]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_5_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_4_VECBLEND]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_6_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7), i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_5_VECBLEND]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_7_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7), i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_6_VECBLEND]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_8_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7)>, <16 x i8> [[A_SROA_32_SROA_0_7_VECBLEND]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VEC_INSERT:%.*]] = insertelement <7 x i8> [[A_SROA_16_SROA_0_0_COPYLOAD]], i8 1, i32 0 +; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VECBLEND20:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i16> to <2 x i8>), i32 0), i8 extractelement (<2 x i8> bitcast (<1 x i16> to <2 x i8>), i32 1), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <7 x i8> [[A_SROA_16_SROA_0_0_VEC_INSERT]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VECBLEND:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3), i8 undef, i8 undef, i8 undef>, <7 x i8> [[A_SROA_16_SROA_0_0_VECBLEND20]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_1_VECBLEND:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3), i8 undef, i8 undef>, <7 x i8> [[A_SROA_16_SROA_0_0_VECBLEND]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_2_VECBLEND:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3), i8 undef>, <7 x i8> [[A_SROA_16_SROA_0_1_VECBLEND]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_3_VECBLEND:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3)>, <7 x i8> [[A_SROA_16_SROA_0_2_VECBLEND]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_0_VECBLEND:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3), i8 undef, i8 undef, i8 undef>, <7 x i8> [[A_SROA_235_SROA_0_0_COPYLOAD]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VEC_INSERT:%.*]] = insertelement <7 x i8> [[A_SROA_235_SROA_0_0_VECBLEND]], i8 1, i32 1 +; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VECBLEND15:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i16> to <2 x i8>), i32 0), i8 extractelement (<2 x i8> bitcast (<1 x i16> to <2 x i8>), i32 1), i8 undef, i8 undef, i8 undef, i8 undef>, <7 x i8> [[A_SROA_235_SROA_0_1_VEC_INSERT]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VECBLEND:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3), i8 undef, i8 undef>, <7 x i8> [[A_SROA_235_SROA_0_1_VECBLEND15]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_2_VECBLEND:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3), i8 undef>, <7 x i8> [[A_SROA_235_SROA_0_1_VECBLEND]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_3_VECBLEND:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3)>, <7 x i8> [[A_SROA_235_SROA_0_2_VECBLEND]] +; CHECK-NEXT: [[A_SROA_15_SROA_0_39_COPYLOAD:%.*]] = load <3 x i8>, ptr [[SRC]], align 1, !tbaa [[TBAA3:![0-9]+]] +; CHECK-NEXT: [[A_SROA_15_SROA_0_39_VEC_EXPAND:%.*]] = shufflevector <3 x i8> [[A_SROA_15_SROA_0_39_COPYLOAD]], <3 x i8> poison, <42 x i32> +; CHECK-NEXT: [[A_SROA_15_SROA_0_39_VECBLEND:%.*]] = select <42 x i1> , <42 x i8> [[A_SROA_15_SROA_0_39_VEC_EXPAND]], <42 x i8> [[A_SROA_15_SROA_0_0_COPYLOAD]] ; CHECK-NEXT: [[A_SROA_16_197_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 3 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_16]], ptr align 1 [[A_SROA_16_197_SRC_SROA_IDX]], i32 5, i1 false), !tbaa [[TBAA51]] -; CHECK-NEXT: [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX12:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_16]], i64 2 -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX12]], i8 42, i32 5, i1 false), !tbaa [[TBAA53:![0-9]+]] -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_235]], i8 42, i32 2, i1 false), !tbaa [[TBAA53]] -; CHECK-NEXT: [[A_SROA_235_209_OVERLAP2_2_1_I8_SROA_IDX8:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235_209_OVERLAP2_2_1_I8_SROA_IDX8]], ptr align 1 [[SRC]], i32 5, i1 false), !tbaa [[TBAA55:![0-9]+]] -; CHECK-NEXT: [[A_SROA_235_210_OVERLAP2_2_2_I8_SROA_IDX9:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 2 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235_210_OVERLAP2_2_2_I8_SROA_IDX9]], ptr align 1 [[SRC]], i32 5, i1 false), !tbaa [[TBAA57:![0-9]+]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_0_COPYLOAD23:%.*]] = load <5 x i8>, ptr [[A_SROA_16_197_SRC_SROA_IDX]], align 1, !tbaa [[TBAA3]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <5 x i8> [[A_SROA_16_SROA_0_0_COPYLOAD23]], <5 x i8> poison, <7 x i32> +; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VECBLEND24:%.*]] = select <7 x i1> , <7 x i8> [[A_SROA_16_SROA_0_0_VEC_EXPAND]], <7 x i8> [[A_SROA_16_SROA_0_3_VECBLEND]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_2_VECBLEND25:%.*]] = select <7 x i1> , <7 x i8> , <7 x i8> [[A_SROA_16_SROA_0_0_VECBLEND24]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_0_VECBLEND13:%.*]] = select <7 x i1> , <7 x i8> , <7 x i8> [[A_SROA_235_SROA_0_3_VECBLEND]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_1_COPYLOAD:%.*]] = load <5 x i8>, ptr [[SRC]], align 1, !tbaa [[TBAA5:![0-9]+]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VEC_EXPAND:%.*]] = shufflevector <5 x i8> [[A_SROA_235_SROA_0_1_COPYLOAD]], <5 x i8> poison, <7 x i32> +; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VECBLEND17:%.*]] = select <7 x i1> , <7 x i8> [[A_SROA_235_SROA_0_1_VEC_EXPAND]], <7 x i8> [[A_SROA_235_SROA_0_0_VECBLEND13]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_2_COPYLOAD:%.*]] = load <5 x i8>, ptr [[SRC]], align 1, !tbaa [[TBAA7:![0-9]+]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_2_VEC_EXPAND:%.*]] = shufflevector <5 x i8> [[A_SROA_235_SROA_0_2_COPYLOAD]], <5 x i8> poison, <7 x i32> +; CHECK-NEXT: [[A_SROA_235_SROA_0_2_VECBLEND18:%.*]] = select <7 x i1> , <7 x i8> [[A_SROA_235_SROA_0_2_VEC_EXPAND]], <7 x i8> [[A_SROA_235_SROA_0_1_VECBLEND17]] ; CHECK-NEXT: [[A_SROA_31_210_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 5 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31]], ptr align 1 [[A_SROA_31_210_SRC_SROA_IDX]], i32 3, i1 false), !tbaa [[TBAA57]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST:%.*]], ptr align 1 [[A_SROA_0]], i32 42, i1 false), !tbaa [[TBAA59:![0-9]+]] +; CHECK-NEXT: [[A_SROA_31_SROA_0_0_COPYLOAD11:%.*]] = load <3 x i8>, ptr [[A_SROA_31_210_SRC_SROA_IDX]], align 1, !tbaa [[TBAA7]] +; CHECK-NEXT: [[A_SROA_31_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <3 x i8> [[A_SROA_31_SROA_0_0_COPYLOAD11]], <3 x i8> poison, <85 x i32> +; CHECK-NEXT: [[A_SROA_31_SROA_0_0_VECBLEND:%.*]] = select <85 x i1> , <85 x i8> [[A_SROA_31_SROA_0_0_VEC_EXPAND]], <85 x i8> [[A_SROA_31_SROA_0_0_COPYLOAD]] +; CHECK-NEXT: store <42 x i8> [[A_SROA_0_SROA_0_0_COPYLOAD]], ptr [[DST:%.*]], align 1, !tbaa [[TBAA9:![0-9]+]] ; CHECK-NEXT: [[A_SROA_2_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 42 -; CHECK-NEXT: store i8 0, ptr [[A_SROA_2_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA59]] +; CHECK-NEXT: store i8 0, ptr [[A_SROA_2_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 43 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_3_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_3]], i32 99, i1 false), !tbaa [[TBAA59]] +; CHECK-NEXT: store <99 x i8> [[A_SROA_3_SROA_0_0_COPYLOAD]], ptr [[A_SROA_3_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] ; CHECK-NEXT: [[A_SROA_32_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 142 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_32_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_32]], i32 16, i1 false), !tbaa [[TBAA59]] +; CHECK-NEXT: store <16 x i8> [[A_SROA_32_SROA_0_8_VECBLEND]], ptr [[A_SROA_32_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] ; CHECK-NEXT: [[A_SROA_15_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 158 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_15_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_15]], i32 42, i1 false), !tbaa [[TBAA59]] +; CHECK-NEXT: store <42 x i8> [[A_SROA_15_SROA_0_39_VECBLEND]], ptr [[A_SROA_15_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] ; CHECK-NEXT: [[A_SROA_16_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 200 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_16_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_16]], i32 7, i1 false), !tbaa [[TBAA59]] +; CHECK-NEXT: store <7 x i8> [[A_SROA_16_SROA_0_2_VECBLEND25]], ptr [[A_SROA_16_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] ; CHECK-NEXT: [[A_SROA_23_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 207 -; CHECK-NEXT: store i8 42, ptr [[A_SROA_23_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA59]] +; CHECK-NEXT: store i8 42, ptr [[A_SROA_23_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] ; CHECK-NEXT: [[A_SROA_235_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 208 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_235]], i32 7, i1 false), !tbaa [[TBAA59]] +; CHECK-NEXT: store <7 x i8> [[A_SROA_235_SROA_0_2_VECBLEND18]], ptr [[A_SROA_235_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] ; CHECK-NEXT: [[A_SROA_31_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 215 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_31]], i32 85, i1 false), !tbaa [[TBAA59]] +; CHECK-NEXT: store <85 x i8> [[A_SROA_31_SROA_0_0_VECBLEND]], ptr [[A_SROA_31_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] ; CHECK-NEXT: ret void ; @@ -315,60 +298,29 @@ entry: define void @test4(ptr %dst, ptr %src) { ; CHECK-LABEL: @test4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [20 x i8], align 1 -; CHECK-NEXT: [[A_SROA_2_SROA_4:%.*]] = alloca [7 x i8], align 1 -; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca [10 x i8], align 1 -; CHECK-NEXT: [[A_SROA_31_SROA_5:%.*]] = alloca [7 x i8], align 1 -; CHECK-NEXT: [[A_SROA_6_SROA_4:%.*]] = alloca [7 x i8], align 1 -; CHECK-NEXT: [[A_SROA_7:%.*]] = alloca [40 x i8], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_0]], ptr align 1 [[SRC:%.*]], i32 20, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <20 x i8>, ptr [[SRC:%.*]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_2_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 20 -; CHECK-NEXT: [[A_SROA_2_SROA_0_0_COPYLOAD:%.*]] = load i16, ptr [[A_SROA_2_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_SROA_2_SROA_3_0_A_SROA_2_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_2_0_SRC_SROA_IDX]], i64 2 -; CHECK-NEXT: [[A_SROA_2_SROA_3_0_COPYLOAD:%.*]] = load i8, ptr [[A_SROA_2_SROA_3_0_A_SROA_2_0_SRC_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_SROA_2_SROA_4_0_A_SROA_2_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_2_0_SRC_SROA_IDX]], i64 3 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_2_SROA_4]], ptr align 1 [[A_SROA_2_SROA_4_0_A_SROA_2_0_SRC_SROA_IDX_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_2_SROA_0_0_COPYLOAD:%.*]] = load <10 x i8>, ptr [[A_SROA_2_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 30 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_3]], ptr align 1 [[A_SROA_3_0_SRC_SROA_IDX]], i32 10, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_3_SROA_0_0_COPYLOAD:%.*]] = load <10 x i8>, ptr [[A_SROA_3_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_31_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 40 -; CHECK-NEXT: [[A_SROA_31_SROA_0_0_COPYLOAD:%.*]] = load i16, ptr [[A_SROA_31_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_SROA_31_SROA_4_0_A_SROA_31_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_31_0_SRC_SROA_IDX]], i64 2 -; CHECK-NEXT: [[A_SROA_31_SROA_4_0_COPYLOAD:%.*]] = load i8, ptr [[A_SROA_31_SROA_4_0_A_SROA_31_0_SRC_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_SROA_31_SROA_5_0_A_SROA_31_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_31_0_SRC_SROA_IDX]], i64 3 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_SROA_5]], ptr align 1 [[A_SROA_31_SROA_5_0_A_SROA_31_0_SRC_SROA_IDX_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_31_SROA_0_0_COPYLOAD:%.*]] = load <10 x i8>, ptr [[A_SROA_31_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_6_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 50 -; CHECK-NEXT: [[A_SROA_6_SROA_0_0_COPYLOAD:%.*]] = load i16, ptr [[A_SROA_6_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_SROA_6_SROA_3_0_A_SROA_6_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_6_0_SRC_SROA_IDX]], i64 2 -; CHECK-NEXT: [[A_SROA_6_SROA_3_0_COPYLOAD:%.*]] = load i8, ptr [[A_SROA_6_SROA_3_0_A_SROA_6_0_SRC_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_SROA_6_SROA_4_0_A_SROA_6_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_6_0_SRC_SROA_IDX]], i64 3 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_6_SROA_4]], ptr align 1 [[A_SROA_6_SROA_4_0_A_SROA_6_0_SRC_SROA_IDX_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_6_SROA_0_0_COPYLOAD:%.*]] = load <10 x i8>, ptr [[A_SROA_6_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_7_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 60 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_7]], ptr align 1 [[A_SROA_7_0_SRC_SROA_IDX]], i32 40, i1 false), !tbaa [[TBAA0]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_SROA_5]], ptr align 1 [[A_SROA_2_SROA_4]], i32 7, i1 false), !tbaa [[TBAA3]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_SROA_5]], ptr align 1 [[A_SROA_6_SROA_4]], i32 7, i1 false), !tbaa [[TBAA5]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST:%.*]], ptr align 1 [[A_SROA_0]], i32 20, i1 false), !tbaa [[TBAA7]] +; CHECK-NEXT: [[A_SROA_7_SROA_0_0_COPYLOAD:%.*]] = load <40 x i8>, ptr [[A_SROA_7_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_31_SROA_0_2_VEC_INSERT:%.*]] = insertelement <10 x i8> [[A_SROA_2_SROA_0_0_COPYLOAD]], i8 0, i32 2 +; CHECK-NEXT: store <20 x i8> [[A_SROA_0_SROA_0_0_COPYLOAD]], ptr [[DST:%.*]], align 1, !tbaa [[TBAA11:![0-9]+]] ; CHECK-NEXT: [[A_SROA_2_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 20 -; CHECK-NEXT: store i16 [[A_SROA_2_SROA_0_0_COPYLOAD]], ptr [[A_SROA_2_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA7]] -; CHECK-NEXT: [[A_SROA_2_SROA_3_0_A_SROA_2_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_2_0_DST_SROA_IDX]], i64 2 -; CHECK-NEXT: store i8 [[A_SROA_2_SROA_3_0_COPYLOAD]], ptr [[A_SROA_2_SROA_3_0_A_SROA_2_0_DST_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA7]] -; CHECK-NEXT: [[A_SROA_2_SROA_4_0_A_SROA_2_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_2_0_DST_SROA_IDX]], i64 3 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_2_SROA_4_0_A_SROA_2_0_DST_SROA_IDX_SROA_IDX]], ptr align 1 [[A_SROA_2_SROA_4]], i32 7, i1 false), !tbaa [[TBAA7]] +; CHECK-NEXT: store <10 x i8> [[A_SROA_2_SROA_0_0_COPYLOAD]], ptr [[A_SROA_2_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 30 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_3_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_3]], i32 10, i1 false), !tbaa [[TBAA7]] +; CHECK-NEXT: store <10 x i8> [[A_SROA_3_SROA_0_0_COPYLOAD]], ptr [[A_SROA_3_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] ; CHECK-NEXT: [[A_SROA_31_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 40 -; CHECK-NEXT: store i16 [[A_SROA_6_SROA_0_0_COPYLOAD]], ptr [[A_SROA_31_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA7]] -; CHECK-NEXT: [[A_SROA_31_SROA_4_0_A_SROA_31_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_31_0_DST_SROA_IDX]], i64 2 -; CHECK-NEXT: store i8 [[A_SROA_6_SROA_3_0_COPYLOAD]], ptr [[A_SROA_31_SROA_4_0_A_SROA_31_0_DST_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA7]] -; CHECK-NEXT: [[A_SROA_31_SROA_5_0_A_SROA_31_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_31_0_DST_SROA_IDX]], i64 3 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_SROA_5_0_A_SROA_31_0_DST_SROA_IDX_SROA_IDX]], ptr align 1 [[A_SROA_31_SROA_5]], i32 7, i1 false), !tbaa [[TBAA7]] +; CHECK-NEXT: store <10 x i8> [[A_SROA_6_SROA_0_0_COPYLOAD]], ptr [[A_SROA_31_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] ; CHECK-NEXT: [[A_SROA_6_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 50 -; CHECK-NEXT: store i16 [[A_SROA_6_SROA_0_0_COPYLOAD]], ptr [[A_SROA_6_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA7]] -; CHECK-NEXT: [[A_SROA_6_SROA_3_0_A_SROA_6_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_6_0_DST_SROA_IDX]], i64 2 -; CHECK-NEXT: store i8 [[A_SROA_6_SROA_3_0_COPYLOAD]], ptr [[A_SROA_6_SROA_3_0_A_SROA_6_0_DST_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA7]] -; CHECK-NEXT: [[A_SROA_6_SROA_4_0_A_SROA_6_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_6_0_DST_SROA_IDX]], i64 3 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_6_SROA_4_0_A_SROA_6_0_DST_SROA_IDX_SROA_IDX]], ptr align 1 [[A_SROA_6_SROA_4]], i32 7, i1 false), !tbaa [[TBAA7]] +; CHECK-NEXT: store <10 x i8> [[A_SROA_6_SROA_0_0_COPYLOAD]], ptr [[A_SROA_6_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] ; CHECK-NEXT: [[A_SROA_7_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 60 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_7_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_7]], i32 40, i1 false), !tbaa [[TBAA7]] +; CHECK-NEXT: store <40 x i8> [[A_SROA_7_SROA_0_0_COPYLOAD]], ptr [[A_SROA_7_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] ; CHECK-NEXT: ret void ; @@ -455,8 +407,8 @@ define void @test7(ptr %src, ptr %dst) { ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load volatile i32, ptr [[SRC:%.*]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_COPYLOAD]], ptr [[A_SROA_0]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1:%.*]] = load volatile i32, ptr [[A_SROA_0]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1]], ptr [[DST:%.*]], align 1, !tbaa [[TBAA3]] +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1:%.*]] = load volatile i32, ptr [[A_SROA_0]], align 4, !tbaa [[TBAA13:![0-9]+]] +; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1]], ptr [[DST:%.*]], align 1, !tbaa [[TBAA13]] ; CHECK-NEXT: ret void ; @@ -476,9 +428,9 @@ define %S2 @test8(ptr %arg) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[S2_NEXT_PTR:%.*]] = getelementptr [[S2:%.*]], ptr [[ARG:%.*]], i64 0, i32 1 ; CHECK-NEXT: [[S2_NEXT:%.*]] = load ptr, ptr [[S2_NEXT_PTR]], align 8, !tbaa [[TBAA0]] -; CHECK-NEXT: [[S2_NEXT_S1:%.*]] = load ptr, ptr [[S2_NEXT]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[S2_NEXT_S1:%.*]] = load ptr, ptr [[S2_NEXT]], align 8, !tbaa [[TBAA13]] ; CHECK-NEXT: [[S2_NEXT_NEXT_PTR:%.*]] = getelementptr [[S2]], ptr [[S2_NEXT]], i64 0, i32 1 -; CHECK-NEXT: [[S2_NEXT_NEXT:%.*]] = load ptr, ptr [[S2_NEXT_NEXT_PTR]], align 8, !tbaa [[TBAA7]] +; CHECK-NEXT: [[S2_NEXT_NEXT:%.*]] = load ptr, ptr [[S2_NEXT_NEXT_PTR]], align 8, !tbaa [[TBAA11]] ; CHECK-NEXT: [[RESULT1:%.*]] = insertvalue [[S2]] poison, ptr [[S2_NEXT_S1]], 0 ; CHECK-NEXT: [[RESULT2:%.*]] = insertvalue [[S2]] [[RESULT1]], ptr [[S2_NEXT_NEXT]], 1 ; CHECK-NEXT: ret [[S2]] [[RESULT2]] @@ -725,7 +677,7 @@ define void @test16(ptr %src, ptr %dst) { ; CHECK-LABEL: @test16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load i24, ptr [[SRC:%.*]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: store i24 0, ptr [[DST:%.*]], align 1, !tbaa [[TBAA5]] +; CHECK-NEXT: store i24 0, ptr [[DST:%.*]], align 1, !tbaa [[TBAA15:![0-9]+]] ; CHECK-NEXT: ret void ; @@ -744,7 +696,7 @@ define void @test17(ptr %src, ptr %dst) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca [3 x i8], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[A]], ptr [[SRC:%.*]], i32 4, i1 true), !tbaa [[TBAA0]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DST:%.*]], ptr [[A]], i32 4, i1 true), !tbaa [[TBAA3]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DST:%.*]], ptr [[A]], i32 4, i1 true), !tbaa [[TBAA13]] ; CHECK-NEXT: ret void ; @@ -765,12 +717,12 @@ define void @test18(ptr %src, ptr %dst, i32 %size) { ; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load i32, ptr [[SRC:%.*]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 4 ; CHECK-NEXT: [[A_SROA_3_0_COPYLOAD:%.*]] = load i32, ptr [[A_SROA_3_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_33]], ptr [[SRC]], i32 [[SIZE:%.*]], i1 false), !tbaa [[TBAA3]] -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_33]], i8 42, i32 [[SIZE]], i1 false), !tbaa [[TBAA5]] -; CHECK-NEXT: store i32 42, ptr [[DST:%.*]], align 1, !tbaa [[TBAA9]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_33]], ptr [[SRC]], i32 [[SIZE:%.*]], i1 false), !tbaa [[TBAA13]] +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_33]], i8 42, i32 [[SIZE]], i1 false), !tbaa [[TBAA15]] +; CHECK-NEXT: store i32 42, ptr [[DST:%.*]], align 1, !tbaa [[TBAA17:![0-9]+]] ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4 -; CHECK-NEXT: store i32 [[A_SROA_3_0_COPYLOAD]], ptr [[A_SROA_3_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DST]], ptr align 1 [[A_SROA_33]], i32 [[SIZE]], i1 false), !tbaa [[TBAA11]] +; CHECK-NEXT: store i32 [[A_SROA_3_0_COPYLOAD]], ptr [[A_SROA_3_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA17]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DST]], ptr align 1 [[A_SROA_33]], i32 [[SIZE]], i1 false), !tbaa [[TBAA19:![0-9]+]] ; CHECK-NEXT: ret void ; @@ -1005,8 +957,7 @@ define void @PR14034(ptr %ptr, ptr %ptr2) { ; thing is to handle empty structs gracefully. ; CHECK-LABEL: @PR14034( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [12 x i8], align 8 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[PTR2:%.*]], ptr align 8 [[A_SROA_0]], i32 12, i1 false) +; CHECK-NEXT: store <12 x i8> undef, ptr [[PTR2:%.*]], align 1 ; CHECK-NEXT: ret void ; @@ -1227,8 +1178,6 @@ define void @PR14465() { ; Ensure that we don't crash when analyzing a alloca larger than the maximum ; integer type width (MAX_INT_BITS) supported by llvm (1048576*32 > (1<<23)-1). ; CHECK-LABEL: @PR14465( -; CHECK-NEXT: [[STACK:%.*]] = alloca [1048576 x i32], align 16 -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[STACK]], i8 -2, i64 4194304, i1 false) ; CHECK-NEXT: ret void ; @@ -1547,8 +1496,8 @@ define void @test24(ptr %src, ptr %dst) { ; CHECK-NEXT: [[A:%.*]] = alloca i64, align 16 ; CHECK-NEXT: [[A_0_COPYLOAD:%.*]] = load volatile i64, ptr [[SRC:%.*]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: store volatile i64 [[A_0_COPYLOAD]], ptr [[A]], align 16, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_0_COPYLOAD1:%.*]] = load volatile i64, ptr [[A]], align 16, !tbaa [[TBAA3]] -; CHECK-NEXT: store volatile i64 [[A_0_COPYLOAD1]], ptr [[DST:%.*]], align 1, !tbaa [[TBAA3]] +; CHECK-NEXT: [[A_0_COPYLOAD1:%.*]] = load volatile i64, ptr [[A]], align 16, !tbaa [[TBAA13]] +; CHECK-NEXT: store volatile i64 [[A_0_COPYLOAD1]], ptr [[DST:%.*]], align 1, !tbaa [[TBAA13]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SROA/pointer-offset-size.ll b/llvm/test/Transforms/SROA/pointer-offset-size.ll index 76b52098a7e62..bf3c63c1ae7a3 100644 --- a/llvm/test/Transforms/SROA/pointer-offset-size.ll +++ b/llvm/test/Transforms/SROA/pointer-offset-size.ll @@ -8,8 +8,7 @@ target datalayout = "e-p:64:64:64:32" define i16 @test(ptr %ts2.i) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[S_SROA_0:%.*]] = alloca [3 x i8], align 8 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TS2_I:%.*]], ptr align 8 [[S_SROA_0]], i32 3, i1 false) +; CHECK-NEXT: store <3 x i8> undef, ptr [[TS2_I:%.*]], align 1 ; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[TS2_I]], align 2 ; CHECK-NEXT: ret i16 [[TMP0]] ; diff --git a/llvm/test/Transforms/SROA/scalable-vectors.ll b/llvm/test/Transforms/SROA/scalable-vectors.ll index d96f4dba868b3..bf9742e570084 100644 --- a/llvm/test/Transforms/SROA/scalable-vectors.ll +++ b/llvm/test/Transforms/SROA/scalable-vectors.ll @@ -67,7 +67,7 @@ define @cast_alloca_to_svint32_t( %type.coe define @cast_alloca_from_svint32_t() { ; CHECK-LABEL: @cast_alloca_from_svint32_t( ; CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 -; CHECK-NEXT: store <16 x i32> undef, ptr [[RETVAL_COERCE]], align 16 +; CHECK-NEXT: store <64 x i8> undef, ptr [[RETVAL_COERCE]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 16 ; CHECK-NEXT: ret [[TMP1]] ; diff --git a/llvm/test/Transforms/SROA/slice-width.ll b/llvm/test/Transforms/SROA/slice-width.ll index 7d2aeaaff57bc..2aa54a104153f 100644 --- a/llvm/test/Transforms/SROA/slice-width.ll +++ b/llvm/test/Transforms/SROA/slice-width.ll @@ -46,8 +46,7 @@ load_i1: define void @memcpy_fp80_padding() { ; CHECK-LABEL: @memcpy_fp80_padding( -; CHECK-NEXT: [[X_SROA_0:%.*]] = alloca x86_fp80, align 16 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[X_SROA_0]], ptr align 16 @foo_copy_source, i32 16, i1 false) +; CHECK-NEXT: [[X_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr @foo_copy_source, align 16 ; CHECK-NEXT: [[X_SROA_1_0_COPYLOAD:%.*]] = load i64, ptr getelementptr inbounds (i8, ptr @foo_copy_source, i64 16), align 16 ; CHECK-NEXT: [[X_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr getelementptr inbounds (i8, ptr @foo_copy_source, i64 24), align 8 ; CHECK-NEXT: store i64 [[X_SROA_1_0_COPYLOAD]], ptr @i64_sink, align 4 @@ -67,8 +66,6 @@ define void @memcpy_fp80_padding() { define void @memset_fp80_padding() { ; CHECK-LABEL: @memset_fp80_padding( -; CHECK-NEXT: [[X_SROA_0:%.*]] = alloca x86_fp80, align 16 -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 [[X_SROA_0]], i8 -1, i32 16, i1 false) ; CHECK-NEXT: store i64 -1, ptr @i64_sink, align 4 ; CHECK-NEXT: ret void ; @@ -136,8 +133,7 @@ define void @PR50888() { define void @PR50910() { ; CHECK-LABEL: @PR50910( -; CHECK-NEXT: [[T1:%.*]] = alloca i8, i64 1, align 8 -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[T1]], i8 0, i64 1, i1 false) +; CHECK-NEXT: [[T1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i8> undef, i8 0, i32 0 ; CHECK-NEXT: ret void ; %t1 = alloca i8, i64 1, align 8 diff --git a/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll b/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll index 00cbe56929c02..c4e68ff0799ea 100644 --- a/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll +++ b/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll @@ -43,11 +43,15 @@ define amdgpu_kernel void @test_memset() #0 { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x half> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[TMP0]] to <8 x i16> ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 0 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 1 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 2 +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_SROA_0_4_VEC_EXTRACT]] to half ; CHECK-NEXT: ret void ; entry: @@ -235,25 +239,23 @@ bb: define amdgpu_kernel void @test_half_array() #0 { ; CHECK-LABEL: @test_half_array( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0:%.*]] = alloca float, align 16 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4:%.*]] = alloca float, align 4 -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 [[B_BLOCKWISE_COPY_SROA_0]], i8 0, i32 4, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 4 [[B_BLOCKWISE_COPY_SROA_4]], i8 0, i32 4, i1 false) ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float undef to i32 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float undef to i32 ; CHECK-NEXT: [[DATA:%.*]] = load [4 x float], ptr undef, align 4 ; CHECK-NEXT: [[DATA_FCA_0_EXTRACT:%.*]] = extractvalue [4 x float] [[DATA]], 0 -; CHECK-NEXT: store float [[DATA_FCA_0_EXTRACT]], ptr [[B_BLOCKWISE_COPY_SROA_0]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[DATA_FCA_0_EXTRACT]] to <4 x i8> ; CHECK-NEXT: [[DATA_FCA_1_EXTRACT:%.*]] = extractvalue [4 x float] [[DATA]], 1 -; CHECK-NEXT: store float [[DATA_FCA_1_EXTRACT]], ptr [[B_BLOCKWISE_COPY_SROA_4]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[DATA_FCA_1_EXTRACT]] to <4 x i8> ; CHECK-NEXT: [[DATA_FCA_2_EXTRACT:%.*]] = extractvalue [4 x float] [[DATA]], 2 ; CHECK-NEXT: [[DATA_FCA_3_EXTRACT:%.*]] = extractvalue [4 x float] [[DATA]], 3 ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_B_BLOCKWISE_COPY_SROA_0_0_LOAD1:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_0]], align 16 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_PTR2_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[B_BLOCKWISE_COPY_SROA_0]], i64 2 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_B_BLOCKWISE_COPY_SROA_0_2_LOAD2:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_0_2_PTR2_SROA_IDX1]], align 2 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_0_B_BLOCKWISE_COPY_SROA_4_4_LOAD3:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_4]], align 4 +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i8> [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i8> [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i8> [[B_BLOCKWISE_COPY_SROA_4_SROA_0_0_VEC_EXTRACT]] to half ; CHECK-NEXT: ret void ; entry: @@ -275,15 +277,17 @@ bb: define amdgpu_kernel void @test_array_vector() #0 { ; CHECK-LABEL: @test_array_vector( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_5:%.*]] = alloca <8 x half>, align 16 -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 [[B_BLOCKWISE_COPY_SROA_5]], i8 0, i32 16, i1 false) ; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x half> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[TMP0]] to <8 x i16> ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 0 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 1 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 2 +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_SROA_0_4_VEC_EXTRACT]] to half ; CHECK-NEXT: ret void ; entry: @@ -305,15 +309,17 @@ bb: define amdgpu_kernel void @test_array_vector2() #0 { ; CHECK-LABEL: @test_array_vector2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_5:%.*]] = alloca <8 x half>, align 16 -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 [[B_BLOCKWISE_COPY_SROA_5]], i8 0, i32 16, i1 false) ; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x half> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[TMP0]] to <8 x i16> ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 0 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 1 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 2 +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_SROA_0_4_VEC_EXTRACT]] to half ; CHECK-NEXT: ret void ; entry: @@ -335,38 +341,32 @@ bb: define amdgpu_kernel void @test_array_vector_no_vector_common_type() #0 { ; CHECK-LABEL: @test_array_vector_no_vector_common_type( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0:%.*]] = alloca float, align 16 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4:%.*]] = alloca float, align 4 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_7:%.*]] = alloca float, align 8 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_10:%.*]] = alloca float, align 4 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_13:%.*]] = alloca <8 x half>, align 16 -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 [[B_BLOCKWISE_COPY_SROA_0]], i8 0, i32 4, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 4 [[B_BLOCKWISE_COPY_SROA_4]], i8 0, i32 4, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 8 [[B_BLOCKWISE_COPY_SROA_7]], i8 0, i32 4, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 4 [[B_BLOCKWISE_COPY_SROA_10]], i8 0, i32 4, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 [[B_BLOCKWISE_COPY_SROA_13]], i8 0, i32 16, i1 false) ; CHECK-NEXT: [[DATA1:%.*]] = load float, ptr undef, align 4 ; CHECK-NEXT: [[DATA2:%.*]] = load float, ptr undef, align 4 ; CHECK-NEXT: [[DATA3:%.*]] = load float, ptr undef, align 4 ; CHECK-NEXT: [[DATA4:%.*]] = load float, ptr undef, align 4 -; CHECK-NEXT: store float [[DATA1]], ptr [[B_BLOCKWISE_COPY_SROA_0]], align 16 -; CHECK-NEXT: store float [[DATA2]], ptr [[B_BLOCKWISE_COPY_SROA_4]], align 4 -; CHECK-NEXT: store float [[DATA3]], ptr [[B_BLOCKWISE_COPY_SROA_7]], align 8 -; CHECK-NEXT: store float [[DATA4]], ptr [[B_BLOCKWISE_COPY_SROA_10]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[DATA1]] to <4 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[DATA2]] to <4 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[DATA3]] to <4 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[DATA4]] to <4 x i8> ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_B_BLOCKWISE_COPY_SROA_0_0_LOAD1:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_0]], align 16 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_PTR2_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[B_BLOCKWISE_COPY_SROA_0]], i64 2 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_B_BLOCKWISE_COPY_SROA_0_2_LOAD2:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_0_2_PTR2_SROA_IDX1]], align 2 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_0_B_BLOCKWISE_COPY_SROA_4_4_LOAD3:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_4]], align 4 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_2_PTR4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[B_BLOCKWISE_COPY_SROA_4]], i64 2 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_2_B_BLOCKWISE_COPY_SROA_4_6_LOAD4:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_4_2_PTR4_SROA_IDX]], align 2 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_7_0_B_BLOCKWISE_COPY_SROA_7_8_LOAD5:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_7]], align 8 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_7_2_PTR6_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[B_BLOCKWISE_COPY_SROA_7]], i64 2 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_7_2_B_BLOCKWISE_COPY_SROA_7_10_LOAD6:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_7_2_PTR6_SROA_IDX]], align 2 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_10_0_B_BLOCKWISE_COPY_SROA_10_12_LOAD7:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_10]], align 4 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_10_2_PTR8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[B_BLOCKWISE_COPY_SROA_10]], i64 2 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_10_2_B_BLOCKWISE_COPY_SROA_10_14_LOAD8:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_10_2_PTR8_SROA_IDX]], align 2 +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i8> [[B_BLOCKWISE_COPY_SROA_0_SROA_0_0_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i8> [[B_BLOCKWISE_COPY_SROA_0_SROA_0_2_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i8> [[B_BLOCKWISE_COPY_SROA_4_SROA_0_0_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_SROA_0_2_VEC_EXTRACT:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i8> [[B_BLOCKWISE_COPY_SROA_4_SROA_0_2_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_7_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i8> [[B_BLOCKWISE_COPY_SROA_7_SROA_0_0_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_7_SROA_0_2_VEC_EXTRACT:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i8> [[B_BLOCKWISE_COPY_SROA_7_SROA_0_2_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_10_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i8> [[B_BLOCKWISE_COPY_SROA_10_SROA_0_0_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_10_SROA_0_2_VEC_EXTRACT:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i8> [[B_BLOCKWISE_COPY_SROA_10_SROA_0_2_VEC_EXTRACT]] to half ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SROA/tbaa-struct.ll b/llvm/test/Transforms/SROA/tbaa-struct.ll index 3e9332c5b11c0..3d55b72e91a60 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct.ll @@ -10,7 +10,8 @@ declare <2 x float> @foo(ptr %0) define void @bar(ptr %y2) { ; CHECK-LABEL: @bar( ; CHECK-NEXT: [[X14:%.*]] = call <2 x float> @foo(ptr [[Y2:%.*]]) -; CHECK-NEXT: store <2 x float> [[X14]], ptr [[Y2]], align 4, !tbaa.struct !0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[X14]] to <2 x i32> +; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[Y2]], align 4, !tbaa.struct !0 ; CHECK-NEXT: ret void ; %x7 = alloca %vector diff --git a/llvm/test/Transforms/SROA/tbaa-struct2.ll b/llvm/test/Transforms/SROA/tbaa-struct2.ll index 1c81fc6163bbc..e7d5f4e74de52 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct2.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct2.ll @@ -9,12 +9,11 @@ declare double @subcall(double %g, i32 %m) define double @bar(ptr %wishart) { ; CHECK-LABEL: @bar( -; CHECK-NEXT: [[TMP_SROA_3:%.*]] = alloca [4 x i8], align 4 ; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load double, ptr [[WISHART:%.*]], align 8, !tbaa.struct !0 ; CHECK-NEXT: [[TMP_SROA_2_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 8 ; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa.struct !7 ; CHECK-NEXT: [[TMP_SROA_3_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 12 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct !8 +; CHECK-NEXT: [[TMP_SROA_3_SROA_0_0_COPYLOAD:%.*]] = load <4 x i8>, ptr [[TMP_SROA_3_0_WISHART_SROA_IDX]], align 4, !tbaa.struct !8 ; CHECK-NEXT: [[CALL:%.*]] = call double @subcall(double [[TMP_SROA_0_0_COPYLOAD]], i32 [[TMP_SROA_2_0_COPYLOAD]]) ; CHECK-NEXT: ret double [[CALL]] ; diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll index bdf50ea39af33..569dd05918332 100644 --- a/llvm/test/Transforms/SROA/vector-promotion.ll +++ b/llvm/test/Transforms/SROA/vector-promotion.ll @@ -567,9 +567,9 @@ define <4 x float> @test12(<4 x i32> %val) { define void @swap-8bytes(ptr %x, ptr %y) { ; CHECK-LABEL: @swap-8bytes( -; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[X:%.*]], align 1 +; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <8 x i8>, ptr [[X:%.*]], align 1 ; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 8, i1 false) -; CHECK-NEXT: store i64 [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 +; CHECK-NEXT: store <8 x i8> [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 ; CHECK-NEXT: ret void ; %tmp = alloca [2 x i32] @@ -581,10 +581,9 @@ define void @swap-8bytes(ptr %x, ptr %y) { define void @swap-7bytes(ptr %x, ptr %y) { ; CHECK-LABEL: @swap-7bytes( -; CHECK-NEXT: [[TMP:%.*]] = alloca [7 x i8], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr [[X:%.*]], i64 7, i1 false) +; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <7 x i8>, ptr [[X:%.*]], align 1 ; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 7, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], i64 7, i1 false) +; CHECK-NEXT: store <7 x i8> [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 ; CHECK-NEXT: ret void ; %tmp = alloca [7 x i8] @@ -596,10 +595,9 @@ define void @swap-7bytes(ptr %x, ptr %y) { define void @swap-16bytes(ptr %x, ptr %y) { ; CHECK-LABEL: @swap-16bytes( -; CHECK-NEXT: [[TMP:%.*]] = alloca [2 x i64], align 8 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr [[X:%.*]], i64 16, i1 false) +; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[X:%.*]], align 1 ; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 16, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], i64 16, i1 false) +; CHECK-NEXT: store <16 x i8> [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 ; CHECK-NEXT: ret void ; %tmp = alloca [2 x i64] @@ -611,10 +609,9 @@ define void @swap-16bytes(ptr %x, ptr %y) { define void @swap-15bytes(ptr %x, ptr %y) { ; CHECK-LABEL: @swap-15bytes( -; CHECK-NEXT: [[TMP:%.*]] = alloca [15 x i8], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr [[X:%.*]], i64 15, i1 false) +; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <15 x i8>, ptr [[X:%.*]], align 1 ; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 15, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], i64 15, i1 false) +; CHECK-NEXT: store <15 x i8> [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 ; CHECK-NEXT: ret void ; %tmp = alloca [15 x i8]