From 3d6137001d884c68f8b37838481875041666d092 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Fri, 28 Nov 2025 11:09:00 -0800 Subject: [PATCH 1/3] [InstCombine] Optimisitically infer load/store type from memcpy --- clang/test/CodeGenCXX/auto-var-init.cpp | 17 +++++++-- .../InstCombine/InstCombineCalls.cpp | 37 +++++++++++++++++-- llvm/test/Transforms/InstCombine/alloca.ll | 28 ++++++++++---- .../X86/SROA-after-final-loop-unrolling-2.ll | 23 +++++------- .../PhaseOrdering/swap-promotion.ll | 8 +++- 5 files changed, 82 insertions(+), 31 deletions(-) diff --git a/clang/test/CodeGenCXX/auto-var-init.cpp b/clang/test/CodeGenCXX/auto-var-init.cpp index 67bc5d417bce9..bc7290b345651 100644 --- a/clang/test/CodeGenCXX/auto-var-init.cpp +++ b/clang/test/CodeGenCXX/auto-var-init.cpp @@ -79,7 +79,7 @@ struct nullinit { char* null = nullptr; }; // ZERO-O0: @__const.test_padded_custom.custom = private unnamed_addr constant { i8, [3 x i8], i32 } { i8 42, [3 x i8] zeroinitializer, i32 13371337 }, align 4 // PATTERN-O1-NOT: @__const.test_padded_uninit.uninit // PATTERN-O1-NOT: @__const.test_padded_custom.custom -// ZERO-O1-NOT: @__const.test_padded_custom.custom +// ZERO-O1: @__const.test_padded_custom.custom = private unnamed_addr constant { i8, [3 x i8], i32 } { i8 42, [3 x i8] zeroinitializer, i32 13371337 }, align 8 struct padded { char c; int i; }; // PATTERN-O0: @__const.test_paddednullinit_uninit.uninit = private unnamed_addr constant { i8, [3 x i8], i32 } { i8 [[I8]], [3 x i8] c"\[[IC]]\[[IC]]\[[IC]]", i32 [[I32]] }, align 4 // PATTERN-O0: @__const.test_paddednullinit_braces.braces = private unnamed_addr constant { i8, [3 x i8], i32 } { i8 [[I8]], [3 x i8] c"\[[IC]]\[[IC]]\[[IC]]", i32 [[I32]] }, align 4 @@ -713,6 +713,13 @@ TEST_CUSTOM(padded, padded, { 42, 13371337 }); // CHECK-NEXT: call void @llvm.memcpy // CHECK-NOT: !annotation // CHECK-NEXT: call void @{{.*}}used{{.*}}%custom) +// ZERO-O1: %custom = alloca %struct.padded, align 4 +// ZERO-O1: %0 = load %struct.padded, ptr @__const.test_padded_custom.custom, align 8 +// ZERO-O1: %[[I8:.*]] = extractvalue %struct.padded %0, 0 +// ZERO-O1: store i8 %[[I8]], ptr %custom, align 4 +// ZERO-O1: %[[I32:.*]] = extractvalue %struct.padded %0, 1 +// ZERO-O1: %[[GEP:.*]] = getelementptr inbounds nuw i8, ptr %custom, i64 4 +// ZERO-O1: store i32 %[[I32]], ptr %[[GEP]], align 4 TEST_UNINIT(paddednullinit, paddednullinit); // CHECK-LABEL: @test_paddednullinit_uninit() @@ -1298,7 +1305,9 @@ TEST_CUSTOM(semivolatile, semivolatile, { 0x44444444, 0x44444444 }); // PATTERN-O1: store i32 1145324612, ptr %custom, align 4 // PATTERN-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds nuw i8, ptr %custom, i64 4 // PATTERN-O1-NEXT: store i32 1145324612, ptr %[[I]], align 4 -// ZERO-O1: store i64 4919131752989213764, ptr %custom, align 8 +// ZERO-O1: store i32 1145324612, ptr %custom, align 4 +// ZERO-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds nuw i8, ptr %custom, i64 4 +// ZERO-O1-NEXT: store i32 1145324612, ptr %[[I]], align 4 // CHECK-NOT: !annotation TEST_UNINIT(semivolatileinit, semivolatileinit); @@ -1441,7 +1450,7 @@ TEST_CUSTOM(matchingreverse, matchingreverse, { .i = 0xf00f }); // CHECK-NOT: !annotation // CHECK-O0: call void @{{.*}}used{{.*}}%custom) // PATTERN-O1: store i32 61455, ptr %custom, align 4 -// ZERO-O1: store i32 61455, ptr %custom, align 4 +// ZERO-O1: store float 0x379E01E000000000, ptr %custom, align 4 // CHECK-NOT: !annotation TEST_UNINIT(unmatched, unmatched); @@ -1527,7 +1536,7 @@ TEST_CUSTOM(unmatchedfp, unmatchedfp, { .d = 3.1415926535897932384626433 }); // CHECK-NOT: !annotation // CHECK-O0: call void @{{.*}}used{{.*}}%custom) // PATTERN-O1: store double 0x400921FB54442D18, ptr %custom, align 8 -// ZERO-O1: store i64 4614256656552045848, ptr %custom, align 8 +// ZERO-O1: store double 0x400921FB54442D18, ptr %custom, align 8 // CHECK-NOT: !annotation TEST_UNINIT(emptyenum, emptyenum); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 92fca90ddb88a..7979540b8b437 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -114,6 +114,33 @@ static bool hasUndefSource(AnyMemTransferInst *MI) { return isa(Src) && Src->hasOneUse(); } +// Optimistically infer a type from either the Src or Dest. +// +// Returns the DefaultTy if unable to infer a type, if inferred types +// disagree, or, if inferred type does not match the size of load/store. +static Type *inferType(const DataLayout &DL, IntegerType *DefaultTy, Value *Src, + Value *Dest) { + Type *SrcTy = nullptr; + Type *DestTy = nullptr; + + if (auto *SrcAI = dyn_cast(Src)) + SrcTy = SrcAI->getAllocatedType(); + + if (auto *DestAI = dyn_cast(Dest)) + DestTy = DestAI->getAllocatedType(); + + if (SrcTy && DestTy && SrcTy != DestTy) + return DefaultTy; // Unable to infer common type + + Type *InferredTy = SrcTy ? SrcTy : DestTy; + + if (InferredTy && + DefaultTy->getPrimitiveSizeInBits() == DL.getTypeSizeInBits(InferredTy)) + return InferredTy; + + return DefaultTy; +} + Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT); MaybeAlign CopyDstAlign = MI->getDestAlign(); @@ -169,16 +196,18 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { if (*CopyDstAlign < Size || *CopySrcAlign < Size) return nullptr; - // Use an integer load+store unless we can find something better. - IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); - // If the memcpy has metadata describing the members, see if we can get the // TBAA, scope and noalias tags describing our copy. AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size); Value *Src = MI->getArgOperand(1); Value *Dest = MI->getArgOperand(0); - LoadInst *L = Builder.CreateLoad(IntType, Src); + + // Use an integer load+store unless we can find something better. + IntegerType *IntType = IntegerType::get(MI->getContext(), Size << 3); + Type *InferredType = inferType(DL, IntType, Src, Dest); + + LoadInst *L = Builder.CreateLoad(InferredType, Src); // Alignment from the mem intrinsic will be better, so use it. L->setAlignment(*CopySrcAlign); L->setAAMetadata(AACopyMD); diff --git a/llvm/test/Transforms/InstCombine/alloca.ll b/llvm/test/Transforms/InstCombine/alloca.ll index 15b43e780f149..de8543e333e54 100644 --- a/llvm/test/Transforms/InstCombine/alloca.ll +++ b/llvm/test/Transforms/InstCombine/alloca.ll @@ -189,24 +189,36 @@ define void @test9(ptr %a) { ; CHECK-LABEL: @test9( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_TYPE:%.*]] }>, align 1 -; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A:%.*]], align 4 -; CHECK-NEXT: store i64 [[TMP0]], ptr [[ARGMEM]], align 4 +; CHECK-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load i32, ptr [[A:%.*]], align 4 +; CHECK-NEXT: [[DOTUNPACK_ELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 4 +; CHECK-NEXT: [[DOTUNPACK_UNPACK2:%.*]] = load i32, ptr [[DOTUNPACK_ELT1]], align 4 +; CHECK-NEXT: store i32 [[DOTUNPACK_UNPACK]], ptr [[ARGMEM]], align 4 +; CHECK-NEXT: [[ARGMEM_REPACK4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARGMEM]], i64 4 +; CHECK-NEXT: store i32 [[DOTUNPACK_UNPACK2]], ptr [[ARGMEM_REPACK4]], align 4 ; CHECK-NEXT: call void @test9_aux(ptr nonnull inalloca(<{ [[STRUCT_TYPE]] }>) [[ARGMEM]]) ; CHECK-NEXT: ret void ; ; P32-LABEL: @test9( ; P32-NEXT: entry: ; P32-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_TYPE:%.*]] }>, align 1 -; P32-NEXT: [[TMP0:%.*]] = load i64, ptr [[A:%.*]], align 4 -; P32-NEXT: store i64 [[TMP0]], ptr [[ARGMEM]], align 4 +; P32-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load i32, ptr [[A:%.*]], align 4 +; P32-NEXT: [[DOTUNPACK_ELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i32 4 +; P32-NEXT: [[DOTUNPACK_UNPACK2:%.*]] = load i32, ptr [[DOTUNPACK_ELT1]], align 4 +; P32-NEXT: store i32 [[DOTUNPACK_UNPACK]], ptr [[ARGMEM]], align 4 +; P32-NEXT: [[ARGMEM_REPACK4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARGMEM]], i32 4 +; P32-NEXT: store i32 [[DOTUNPACK_UNPACK2]], ptr [[ARGMEM_REPACK4]], align 4 ; P32-NEXT: call void @test9_aux(ptr nonnull inalloca(<{ [[STRUCT_TYPE]] }>) [[ARGMEM]]) ; P32-NEXT: ret void ; ; NODL-LABEL: @test9( ; NODL-NEXT: entry: ; NODL-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_TYPE:%.*]] }>, align 8 -; NODL-NEXT: [[TMP0:%.*]] = load i64, ptr [[A:%.*]], align 4 -; NODL-NEXT: store i64 [[TMP0]], ptr [[ARGMEM]], align 8 +; NODL-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load i32, ptr [[A:%.*]], align 4 +; NODL-NEXT: [[DOTUNPACK_ELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 4 +; NODL-NEXT: [[DOTUNPACK_UNPACK2:%.*]] = load i32, ptr [[DOTUNPACK_ELT1]], align 4 +; NODL-NEXT: store i32 [[DOTUNPACK_UNPACK]], ptr [[ARGMEM]], align 8 +; NODL-NEXT: [[ARGMEM_REPACK4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARGMEM]], i64 4 +; NODL-NEXT: store i32 [[DOTUNPACK_UNPACK2]], ptr [[ARGMEM_REPACK4]], align 4 ; NODL-NEXT: call void @test9_aux(ptr nonnull inalloca(<{ [[STRUCT_TYPE]] }>) [[ARGMEM]]) ; NODL-NEXT: ret void ; @@ -251,8 +263,8 @@ entry: define void @test_inalloca_with_element_count(ptr %a) { ; ALL-LABEL: @test_inalloca_with_element_count( -; ALL-NEXT: [[ALLOCA1:%.*]] = alloca inalloca [10 x %struct_type], align 4 -; ALL-NEXT: call void @test9_aux(ptr nonnull inalloca([[STRUCT_TYPE:%.*]]) [[ALLOCA1]]) +; ALL-NEXT: [[ALLOCA1:%.*]] = alloca inalloca [10 x [[STRUCT_TYPE:%.*]]], align 4 +; ALL-NEXT: call void @test9_aux(ptr nonnull inalloca([[STRUCT_TYPE]]) [[ALLOCA1]]) ; ALL-NEXT: ret void ; %alloca = alloca inalloca %struct_type, i32 10, align 4 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll index f42101ffe89aa..e346ee8414052 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll @@ -28,30 +28,27 @@ define dso_local void @foo(i32 noundef %arg, ptr noundef nonnull align 4 derefer ; CHECK-NEXT: [[ARG_OFF:%.*]] = add i32 [[ARG]], 127 ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[ARG_OFF]], 255 ; CHECK-NEXT: br i1 [[TMP0]], label %[[BB12:.*]], label %[[BB13:.*]] -; CHECK: [[BB12_LOOPEXIT:.*]]: -; CHECK-NEXT: [[I3_SROA_8_0_INSERT_EXT:%.*]] = zext i32 [[I21_3:%.*]] to i64 -; CHECK-NEXT: [[I3_SROA_8_0_INSERT_SHIFT:%.*]] = shl nuw i64 [[I3_SROA_8_0_INSERT_EXT]], 32 -; CHECK-NEXT: [[I3_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[I21_2:%.*]] to i64 -; CHECK-NEXT: [[I3_SROA_0_0_INSERT_INSERT:%.*]] = or disjoint i64 [[I3_SROA_8_0_INSERT_SHIFT]], [[I3_SROA_0_0_INSERT_EXT]] -; CHECK-NEXT: br label %[[BB12]] ; CHECK: [[BB12]]: -; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ [[I3_SROA_0_0_INSERT_INSERT]], %[[BB12_LOOPEXIT]] ], [ 180388626456, %[[BB]] ] -; CHECK-NEXT: store i64 [[TMP1]], ptr [[ARG1]], align 4, !tbaa [[CHAR_TBAA5:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x i32> [ , %[[BB]] ], [ [[I3_SROA_0_4_VEC_INSERT33:%.*]], %[[BB13]] ] +; CHECK-NEXT: store <2 x i32> [[TMP2]], ptr [[ARG1]], align 4, !tbaa [[CHAR_TBAA5:![0-9]+]] ; CHECK-NEXT: ret void ; CHECK: [[BB13]]: -; CHECK-NEXT: [[I3_SROA_8_0:%.*]] = phi i32 [ [[I21_3]], %[[BB13]] ], [ 42, %[[BB]] ] -; CHECK-NEXT: [[I3_SROA_0_0:%.*]] = phi i32 [ [[I21_2]], %[[BB13]] ], [ 24, %[[BB]] ] +; CHECK-NEXT: [[I3_SROA_0_1:%.*]] = phi <2 x i32> [ [[I3_SROA_0_4_VEC_INSERT33]], %[[BB13]] ], [ , %[[BB]] ] ; CHECK-NEXT: [[I4_05:%.*]] = phi i32 [ [[I24_3:%.*]], %[[BB13]] ], [ 0, %[[BB]] ] +; CHECK-NEXT: [[I3_SROA_0_0:%.*]] = extractelement <2 x i32> [[I3_SROA_0_1]], i64 0 ; CHECK-NEXT: [[I21:%.*]] = mul nsw i32 [[I3_SROA_0_0]], [[I4_05]] ; CHECK-NEXT: [[I24:%.*]] = or disjoint i32 [[I4_05]], 1 +; CHECK-NEXT: [[I3_SROA_8_0:%.*]] = extractelement <2 x i32> [[I3_SROA_0_1]], i64 1 ; CHECK-NEXT: [[I21_1:%.*]] = mul nsw i32 [[I3_SROA_8_0]], [[I24]] ; CHECK-NEXT: [[I24_1:%.*]] = or disjoint i32 [[I4_05]], 2 -; CHECK-NEXT: [[I21_2]] = mul nsw i32 [[I21]], [[I24_1]] +; CHECK-NEXT: [[I21_2:%.*]] = mul nsw i32 [[I21]], [[I24_1]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[I21_2]], i64 0 ; CHECK-NEXT: [[I24_2:%.*]] = or disjoint i32 [[I4_05]], 3 -; CHECK-NEXT: [[I21_3]] = mul nsw i32 [[I21_1]], [[I24_2]] +; CHECK-NEXT: [[I21_3:%.*]] = mul nsw i32 [[I21_1]], [[I24_2]] +; CHECK-NEXT: [[I3_SROA_0_4_VEC_INSERT33]] = insertelement <2 x i32> [[TMP1]], i32 [[I21_3]], i64 1 ; CHECK-NEXT: [[I24_3]] = add nuw nsw i32 [[I4_05]], 4 ; CHECK-NEXT: [[I11_NOT_3:%.*]] = icmp eq i32 [[I24_3]], [[I10]] -; CHECK-NEXT: br i1 [[I11_NOT_3]], label %[[BB12_LOOPEXIT]], label %[[BB13]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[I11_NOT_3]], label %[[BB12]], label %[[BB13]], !llvm.loop [[LOOP8:![0-9]+]] ; bb: %i = alloca i32, align 4 diff --git a/llvm/test/Transforms/PhaseOrdering/swap-promotion.ll b/llvm/test/Transforms/PhaseOrdering/swap-promotion.ll index 059a122262786..1f9ec9b6ac5a1 100644 --- a/llvm/test/Transforms/PhaseOrdering/swap-promotion.ll +++ b/llvm/test/Transforms/PhaseOrdering/swap-promotion.ll @@ -5,10 +5,14 @@ define void @swap(ptr %p1, ptr %p2) { ; CHECK-LABEL: @swap( -; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[P1:%.*]], align 1 +; CHECK-NEXT: [[DOTUNPACK:%.*]] = load i32, ptr [[P1:%.*]], align 1 +; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 4 +; CHECK-NEXT: [[DOTUNPACK2:%.*]] = load i32, ptr [[DOTELT1]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[P2:%.*]], align 1 ; CHECK-NEXT: store i64 [[TMP2]], ptr [[P1]], align 1 -; CHECK-NEXT: store i64 [[TMP1]], ptr [[P2]], align 1 +; CHECK-NEXT: store i32 [[DOTUNPACK]], ptr [[P2]], align 1 +; CHECK-NEXT: [[P2_REPACK8:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 4 +; CHECK-NEXT: store i32 [[DOTUNPACK2]], ptr [[P2_REPACK8]], align 1 ; CHECK-NEXT: ret void ; %tmp = alloca [2 x i32] From f43d6a47f55dfa59ac3017dce84e82869b1b5038 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Fri, 28 Nov 2025 14:02:23 -0800 Subject: [PATCH 2/3] test cases: use alloca type when applicable --- .../Transforms/InstCombine/memcpy_alloca.ll | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/memcpy_alloca.ll b/llvm/test/Transforms/InstCombine/memcpy_alloca.ll index b86066c2776e8..e02f659e34253 100644 --- a/llvm/test/Transforms/InstCombine/memcpy_alloca.ll +++ b/llvm/test/Transforms/InstCombine/memcpy_alloca.ll @@ -71,4 +71,38 @@ define void @test6(ptr %dest) { ret void } +; Infer the type of the generated load/store when possible from an alloca + +define void @test7(ptr %src, ptr %dest) { +; CHECK-LABEL: @test7( +; CHECK-NEXT: %[[UNPACK0:.*]] = load i32, ptr %src, align 1 +; CHECK-NEXT: %[[SRC_GEP:.*]] = getelementptr inbounds nuw i8, ptr %src, i64 4 +; CHECK-NEXT: %[[UNPACK1:.*]] = load i32, ptr %[[SRC_GEP]], align 1 +; CHECK-NEXT: store i32 %[[UNPACK0]], ptr %dest, align 1 +; CHECK-NEXT: %[[DEST_GEP:.*]] = getelementptr inbounds nuw i8, ptr %dest, i64 4 +; CHECK-NEXT: store i32 %[[UNPACK1]], ptr %[[DEST_GEP]], align 1 +; CHECK-NEXT: ret void +; + %temp = alloca [2 x i32], align 4 + call void @llvm.memcpy.p0.p0.i32(ptr %temp, ptr %src, i32 8, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %temp, i32 8, i1 false) + + ret void +} + +; Ensure we don't use alloca type if only paritally copying + +define void @test8(ptr %src, ptr %dest) { +; CHECK-LABEL: @test8( +; CHECK-NEXT: %[[LI:.*]] = load i32, ptr %src, align 1 +; CHECK-NEXT: store i32 %[[LI]], ptr %dest, align 1 +; CHECK-NEXT: ret void +; + %temp = alloca [2 x i32], align 4 + call void @llvm.memcpy.p0.p0.i32(ptr %temp, ptr %src, i32 4, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %temp, i32 4, i1 false) + + ret void +} + declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) From e2999764f59060f12be76e5b0de08d02be402c81 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Fri, 28 Nov 2025 15:12:29 -0800 Subject: [PATCH 3/3] test case: inferred different alloca types --- .../InstCombine/InstCombineCalls.cpp | 10 +++----- .../Transforms/InstCombine/memcpy_alloca.ll | 25 +++++++++++++++++++ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 7979540b8b437..b9c2a6681f687 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -114,10 +114,11 @@ static bool hasUndefSource(AnyMemTransferInst *MI) { return isa(Src) && Src->hasOneUse(); } -// Optimistically infer a type from either the Src or Dest. +// Optimistically infer a type from either the Src or Dest. Prefers the Src +// over the Dest type. // -// Returns the DefaultTy if unable to infer a type, if inferred types -// disagree, or, if inferred type does not match the size of load/store. +// Returns the DefaultTy if unable to infer a type, or, if inferred type does +// not match the size of load/store. static Type *inferType(const DataLayout &DL, IntegerType *DefaultTy, Value *Src, Value *Dest) { Type *SrcTy = nullptr; @@ -129,9 +130,6 @@ static Type *inferType(const DataLayout &DL, IntegerType *DefaultTy, Value *Src, if (auto *DestAI = dyn_cast(Dest)) DestTy = DestAI->getAllocatedType(); - if (SrcTy && DestTy && SrcTy != DestTy) - return DefaultTy; // Unable to infer common type - Type *InferredTy = SrcTy ? SrcTy : DestTy; if (InferredTy && diff --git a/llvm/test/Transforms/InstCombine/memcpy_alloca.ll b/llvm/test/Transforms/InstCombine/memcpy_alloca.ll index e02f659e34253..fb795cbbadc78 100644 --- a/llvm/test/Transforms/InstCombine/memcpy_alloca.ll +++ b/llvm/test/Transforms/InstCombine/memcpy_alloca.ll @@ -105,4 +105,29 @@ define void @test8(ptr %src, ptr %dest) { ret void } +; Ensure we don't use alloca type if they don't agree + +define double @test9(ptr %src, ptr %dest) { +; CHECK-LABEL: @test9( +; CHECK-NEXT: %[[TEMP:.*]] = alloca double, align 1 +; CHECK-NEXT: %[[UNPACK0:.*]] = load i32, ptr %src, align 1 +; CHECK-NEXT: %[[SRC_GEP:.*]] = getelementptr inbounds nuw i8, ptr %src, i64 4 +; CHECK-NEXT: %[[UNPACK1:.*]] = load i32, ptr %[[SRC_GEP]], align 1 +; CHECK-NEXT: store i32 %[[UNPACK0]], ptr %[[TEMP]], align 1 +; CHECK-NEXT: %[[TEMP_GEP:.*]] = getelementptr inbounds nuw i8, ptr %[[TEMP]], i64 4 +; CHECK-NEXT: store i32 %[[UNPACK1]], ptr %[[TEMP_GEP]], align 1 +; CHECK-NEXT: %[[RES:.*]] = load double, ptr %[[TEMP]] +; CHECK-NEXT: ret double %[[RES]] +; + %temp = alloca [2 x i32], align 4 + %out = alloca double, align 1 + + call void @llvm.memcpy.p0.p0.i32(ptr %temp, ptr %src, i32 8, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr %out, ptr %temp, i32 8, i1 false) + + %res = load double, ptr %out + + ret double %res +} + declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)