Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions clang/test/CodeGenCXX/auto-var-init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ struct nullinit { char* null = nullptr; };
// ZERO-O0: @__const.test_padded_custom.custom = private unnamed_addr constant { i8, [3 x i8], i32 } { i8 42, [3 x i8] zeroinitializer, i32 13371337 }, align 4
// PATTERN-O1-NOT: @__const.test_padded_uninit.uninit
// PATTERN-O1-NOT: @__const.test_padded_custom.custom
// ZERO-O1-NOT: @__const.test_padded_custom.custom
// ZERO-O1: @__const.test_padded_custom.custom = private unnamed_addr constant { i8, [3 x i8], i32 } { i8 42, [3 x i8] zeroinitializer, i32 13371337 }, align 8
struct padded { char c; int i; };
// PATTERN-O0: @__const.test_paddednullinit_uninit.uninit = private unnamed_addr constant { i8, [3 x i8], i32 } { i8 [[I8]], [3 x i8] c"\[[IC]]\[[IC]]\[[IC]]", i32 [[I32]] }, align 4
// PATTERN-O0: @__const.test_paddednullinit_braces.braces = private unnamed_addr constant { i8, [3 x i8], i32 } { i8 [[I8]], [3 x i8] c"\[[IC]]\[[IC]]\[[IC]]", i32 [[I32]] }, align 4
Expand Down Expand Up @@ -713,6 +713,13 @@ TEST_CUSTOM(padded, padded, { 42, 13371337 });
// CHECK-NEXT: call void @llvm.memcpy
// CHECK-NOT: !annotation
// CHECK-NEXT: call void @{{.*}}used{{.*}}%custom)
// ZERO-O1: %custom = alloca %struct.padded, align 4
// ZERO-O1: %0 = load %struct.padded, ptr @__const.test_padded_custom.custom, align 8
// ZERO-O1: %[[I8:.*]] = extractvalue %struct.padded %0, 0
// ZERO-O1: store i8 %[[I8]], ptr %custom, align 4
// ZERO-O1: %[[I32:.*]] = extractvalue %struct.padded %0, 1
// ZERO-O1: %[[GEP:.*]] = getelementptr inbounds nuw i8, ptr %custom, i64 4
// ZERO-O1: store i32 %[[I32]], ptr %[[GEP]], align 4

TEST_UNINIT(paddednullinit, paddednullinit);
// CHECK-LABEL: @test_paddednullinit_uninit()
Expand Down Expand Up @@ -1298,7 +1305,9 @@ TEST_CUSTOM(semivolatile, semivolatile, { 0x44444444, 0x44444444 });
// PATTERN-O1: store i32 1145324612, ptr %custom, align 4
// PATTERN-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds nuw i8, ptr %custom, i64 4
// PATTERN-O1-NEXT: store i32 1145324612, ptr %[[I]], align 4
// ZERO-O1: store i64 4919131752989213764, ptr %custom, align 8
// ZERO-O1: store i32 1145324612, ptr %custom, align 4
// ZERO-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds nuw i8, ptr %custom, i64 4
// ZERO-O1-NEXT: store i32 1145324612, ptr %[[I]], align 4
// CHECK-NOT: !annotation

TEST_UNINIT(semivolatileinit, semivolatileinit);
Expand Down Expand Up @@ -1441,7 +1450,7 @@ TEST_CUSTOM(matchingreverse, matchingreverse, { .i = 0xf00f });
// CHECK-NOT: !annotation
// CHECK-O0: call void @{{.*}}used{{.*}}%custom)
// PATTERN-O1: store i32 61455, ptr %custom, align 4
// ZERO-O1: store i32 61455, ptr %custom, align 4
// ZERO-O1: store float 0x379E01E000000000, ptr %custom, align 4
// CHECK-NOT: !annotation

TEST_UNINIT(unmatched, unmatched);
Expand Down Expand Up @@ -1527,7 +1536,7 @@ TEST_CUSTOM(unmatchedfp, unmatchedfp, { .d = 3.1415926535897932384626433 });
// CHECK-NOT: !annotation
// CHECK-O0: call void @{{.*}}used{{.*}}%custom)
// PATTERN-O1: store double 0x400921FB54442D18, ptr %custom, align 8
// ZERO-O1: store i64 4614256656552045848, ptr %custom, align 8
// ZERO-O1: store double 0x400921FB54442D18, ptr %custom, align 8
// CHECK-NOT: !annotation

TEST_UNINIT(emptyenum, emptyenum);
Expand Down
35 changes: 31 additions & 4 deletions llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,31 @@ static bool hasUndefSource(AnyMemTransferInst *MI) {
return isa<AllocaInst>(Src) && Src->hasOneUse();
}

// Optimistically infer a type from either the Src or Dest. Prefers the Src
// over the Dest type.
//
// Returns the DefaultTy if unable to infer a type, or, if inferred type does
// not match the size of load/store.
static Type *inferType(const DataLayout &DL, IntegerType *DefaultTy, Value *Src,
Value *Dest) {
Type *SrcTy = nullptr;
Type *DestTy = nullptr;

if (auto *SrcAI = dyn_cast<AllocaInst>(Src))
SrcTy = SrcAI->getAllocatedType();

if (auto *DestAI = dyn_cast<AllocaInst>(Dest))
DestTy = DestAI->getAllocatedType();

Type *InferredTy = SrcTy ? SrcTy : DestTy;

if (InferredTy &&
DefaultTy->getPrimitiveSizeInBits() == DL.getTypeSizeInBits(InferredTy))
return InferredTy;

return DefaultTy;
}

Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
MaybeAlign CopyDstAlign = MI->getDestAlign();
Expand Down Expand Up @@ -169,16 +194,18 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
if (*CopyDstAlign < Size || *CopySrcAlign < Size)
return nullptr;

// Use an integer load+store unless we can find something better.
IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);

// If the memcpy has metadata describing the members, see if we can get the
// TBAA, scope and noalias tags describing our copy.
AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);

Value *Src = MI->getArgOperand(1);
Value *Dest = MI->getArgOperand(0);
LoadInst *L = Builder.CreateLoad(IntType, Src);

// Use an integer load+store unless we can find something better.
IntegerType *IntType = IntegerType::get(MI->getContext(), Size << 3);
Type *InferredType = inferType(DL, IntType, Src, Dest);

LoadInst *L = Builder.CreateLoad(InferredType, Src);
// Alignment from the mem intrinsic will be better, so use it.
L->setAlignment(*CopySrcAlign);
L->setAAMetadata(AACopyMD);
Expand Down
28 changes: 20 additions & 8 deletions llvm/test/Transforms/InstCombine/alloca.ll
Original file line number Diff line number Diff line change
Expand Up @@ -189,24 +189,36 @@ define void @test9(ptr %a) {
; CHECK-LABEL: @test9(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_TYPE:%.*]] }>, align 1
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A:%.*]], align 4
; CHECK-NEXT: store i64 [[TMP0]], ptr [[ARGMEM]], align 4
; CHECK-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load i32, ptr [[A:%.*]], align 4
; CHECK-NEXT: [[DOTUNPACK_ELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 4
; CHECK-NEXT: [[DOTUNPACK_UNPACK2:%.*]] = load i32, ptr [[DOTUNPACK_ELT1]], align 4
; CHECK-NEXT: store i32 [[DOTUNPACK_UNPACK]], ptr [[ARGMEM]], align 4
; CHECK-NEXT: [[ARGMEM_REPACK4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARGMEM]], i64 4
; CHECK-NEXT: store i32 [[DOTUNPACK_UNPACK2]], ptr [[ARGMEM_REPACK4]], align 4
; CHECK-NEXT: call void @test9_aux(ptr nonnull inalloca(<{ [[STRUCT_TYPE]] }>) [[ARGMEM]])
; CHECK-NEXT: ret void
;
; P32-LABEL: @test9(
; P32-NEXT: entry:
; P32-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_TYPE:%.*]] }>, align 1
; P32-NEXT: [[TMP0:%.*]] = load i64, ptr [[A:%.*]], align 4
; P32-NEXT: store i64 [[TMP0]], ptr [[ARGMEM]], align 4
; P32-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load i32, ptr [[A:%.*]], align 4
; P32-NEXT: [[DOTUNPACK_ELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i32 4
; P32-NEXT: [[DOTUNPACK_UNPACK2:%.*]] = load i32, ptr [[DOTUNPACK_ELT1]], align 4
; P32-NEXT: store i32 [[DOTUNPACK_UNPACK]], ptr [[ARGMEM]], align 4
; P32-NEXT: [[ARGMEM_REPACK4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARGMEM]], i32 4
; P32-NEXT: store i32 [[DOTUNPACK_UNPACK2]], ptr [[ARGMEM_REPACK4]], align 4
; P32-NEXT: call void @test9_aux(ptr nonnull inalloca(<{ [[STRUCT_TYPE]] }>) [[ARGMEM]])
; P32-NEXT: ret void
;
; NODL-LABEL: @test9(
; NODL-NEXT: entry:
; NODL-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_TYPE:%.*]] }>, align 8
; NODL-NEXT: [[TMP0:%.*]] = load i64, ptr [[A:%.*]], align 4
; NODL-NEXT: store i64 [[TMP0]], ptr [[ARGMEM]], align 8
; NODL-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load i32, ptr [[A:%.*]], align 4
; NODL-NEXT: [[DOTUNPACK_ELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 4
; NODL-NEXT: [[DOTUNPACK_UNPACK2:%.*]] = load i32, ptr [[DOTUNPACK_ELT1]], align 4
; NODL-NEXT: store i32 [[DOTUNPACK_UNPACK]], ptr [[ARGMEM]], align 8
; NODL-NEXT: [[ARGMEM_REPACK4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARGMEM]], i64 4
; NODL-NEXT: store i32 [[DOTUNPACK_UNPACK2]], ptr [[ARGMEM_REPACK4]], align 4
; NODL-NEXT: call void @test9_aux(ptr nonnull inalloca(<{ [[STRUCT_TYPE]] }>) [[ARGMEM]])
; NODL-NEXT: ret void
;
Expand Down Expand Up @@ -251,8 +263,8 @@ entry:

define void @test_inalloca_with_element_count(ptr %a) {
; ALL-LABEL: @test_inalloca_with_element_count(
; ALL-NEXT: [[ALLOCA1:%.*]] = alloca inalloca [10 x %struct_type], align 4
; ALL-NEXT: call void @test9_aux(ptr nonnull inalloca([[STRUCT_TYPE:%.*]]) [[ALLOCA1]])
; ALL-NEXT: [[ALLOCA1:%.*]] = alloca inalloca [10 x [[STRUCT_TYPE:%.*]]], align 4
; ALL-NEXT: call void @test9_aux(ptr nonnull inalloca([[STRUCT_TYPE]]) [[ALLOCA1]])
; ALL-NEXT: ret void
;
%alloca = alloca inalloca %struct_type, i32 10, align 4
Expand Down
59 changes: 59 additions & 0 deletions llvm/test/Transforms/InstCombine/memcpy_alloca.ll
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,63 @@ define void @test6(ptr %dest) {
ret void
}

; Infer the type of the generated load/store when possible from an alloca

define void @test7(ptr %src, ptr %dest) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: %[[UNPACK0:.*]] = load i32, ptr %src, align 1
; CHECK-NEXT: %[[SRC_GEP:.*]] = getelementptr inbounds nuw i8, ptr %src, i64 4
; CHECK-NEXT: %[[UNPACK1:.*]] = load i32, ptr %[[SRC_GEP]], align 1
; CHECK-NEXT: store i32 %[[UNPACK0]], ptr %dest, align 1
; CHECK-NEXT: %[[DEST_GEP:.*]] = getelementptr inbounds nuw i8, ptr %dest, i64 4
; CHECK-NEXT: store i32 %[[UNPACK1]], ptr %[[DEST_GEP]], align 1
; CHECK-NEXT: ret void
;
%temp = alloca [2 x i32], align 4
call void @llvm.memcpy.p0.p0.i32(ptr %temp, ptr %src, i32 8, i1 false)
call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %temp, i32 8, i1 false)

ret void
}

; Ensure we don't use alloca type if only paritally copying

define void @test8(ptr %src, ptr %dest) {
; CHECK-LABEL: @test8(
; CHECK-NEXT: %[[LI:.*]] = load i32, ptr %src, align 1
; CHECK-NEXT: store i32 %[[LI]], ptr %dest, align 1
; CHECK-NEXT: ret void
;
%temp = alloca [2 x i32], align 4
call void @llvm.memcpy.p0.p0.i32(ptr %temp, ptr %src, i32 4, i1 false)
call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %temp, i32 4, i1 false)

ret void
}

; Ensure we don't use alloca type if they don't agree

define double @test9(ptr %src, ptr %dest) {
; CHECK-LABEL: @test9(
; CHECK-NEXT: %[[TEMP:.*]] = alloca double, align 1
; CHECK-NEXT: %[[UNPACK0:.*]] = load i32, ptr %src, align 1
; CHECK-NEXT: %[[SRC_GEP:.*]] = getelementptr inbounds nuw i8, ptr %src, i64 4
; CHECK-NEXT: %[[UNPACK1:.*]] = load i32, ptr %[[SRC_GEP]], align 1
; CHECK-NEXT: store i32 %[[UNPACK0]], ptr %[[TEMP]], align 1
; CHECK-NEXT: %[[TEMP_GEP:.*]] = getelementptr inbounds nuw i8, ptr %[[TEMP]], i64 4
; CHECK-NEXT: store i32 %[[UNPACK1]], ptr %[[TEMP_GEP]], align 1
; CHECK-NEXT: %[[RES:.*]] = load double, ptr %[[TEMP]]
; CHECK-NEXT: ret double %[[RES]]
;
%temp = alloca [2 x i32], align 4
%out = alloca double, align 1

call void @llvm.memcpy.p0.p0.i32(ptr %temp, ptr %src, i32 8, i1 false)
call void @llvm.memcpy.p0.p0.i32(ptr %out, ptr %temp, i32 8, i1 false)

%res = load double, ptr %out

ret double %res
}

declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
Original file line number Diff line number Diff line change
Expand Up @@ -28,30 +28,27 @@ define dso_local void @foo(i32 noundef %arg, ptr noundef nonnull align 4 derefer
; CHECK-NEXT: [[ARG_OFF:%.*]] = add i32 [[ARG]], 127
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[ARG_OFF]], 255
; CHECK-NEXT: br i1 [[TMP0]], label %[[BB12:.*]], label %[[BB13:.*]]
; CHECK: [[BB12_LOOPEXIT:.*]]:
; CHECK-NEXT: [[I3_SROA_8_0_INSERT_EXT:%.*]] = zext i32 [[I21_3:%.*]] to i64
; CHECK-NEXT: [[I3_SROA_8_0_INSERT_SHIFT:%.*]] = shl nuw i64 [[I3_SROA_8_0_INSERT_EXT]], 32
; CHECK-NEXT: [[I3_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[I21_2:%.*]] to i64
; CHECK-NEXT: [[I3_SROA_0_0_INSERT_INSERT:%.*]] = or disjoint i64 [[I3_SROA_8_0_INSERT_SHIFT]], [[I3_SROA_0_0_INSERT_EXT]]
; CHECK-NEXT: br label %[[BB12]]
; CHECK: [[BB12]]:
; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ [[I3_SROA_0_0_INSERT_INSERT]], %[[BB12_LOOPEXIT]] ], [ 180388626456, %[[BB]] ]
; CHECK-NEXT: store i64 [[TMP1]], ptr [[ARG1]], align 4, !tbaa [[CHAR_TBAA5:![0-9]+]]
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x i32> [ <i32 24, i32 42>, %[[BB]] ], [ [[I3_SROA_0_4_VEC_INSERT33:%.*]], %[[BB13]] ]
; CHECK-NEXT: store <2 x i32> [[TMP2]], ptr [[ARG1]], align 4, !tbaa [[CHAR_TBAA5:![0-9]+]]
; CHECK-NEXT: ret void
; CHECK: [[BB13]]:
; CHECK-NEXT: [[I3_SROA_8_0:%.*]] = phi i32 [ [[I21_3]], %[[BB13]] ], [ 42, %[[BB]] ]
; CHECK-NEXT: [[I3_SROA_0_0:%.*]] = phi i32 [ [[I21_2]], %[[BB13]] ], [ 24, %[[BB]] ]
; CHECK-NEXT: [[I3_SROA_0_1:%.*]] = phi <2 x i32> [ [[I3_SROA_0_4_VEC_INSERT33]], %[[BB13]] ], [ <i32 24, i32 42>, %[[BB]] ]
; CHECK-NEXT: [[I4_05:%.*]] = phi i32 [ [[I24_3:%.*]], %[[BB13]] ], [ 0, %[[BB]] ]
; CHECK-NEXT: [[I3_SROA_0_0:%.*]] = extractelement <2 x i32> [[I3_SROA_0_1]], i64 0
; CHECK-NEXT: [[I21:%.*]] = mul nsw i32 [[I3_SROA_0_0]], [[I4_05]]
; CHECK-NEXT: [[I24:%.*]] = or disjoint i32 [[I4_05]], 1
; CHECK-NEXT: [[I3_SROA_8_0:%.*]] = extractelement <2 x i32> [[I3_SROA_0_1]], i64 1
; CHECK-NEXT: [[I21_1:%.*]] = mul nsw i32 [[I3_SROA_8_0]], [[I24]]
; CHECK-NEXT: [[I24_1:%.*]] = or disjoint i32 [[I4_05]], 2
; CHECK-NEXT: [[I21_2]] = mul nsw i32 [[I21]], [[I24_1]]
; CHECK-NEXT: [[I21_2:%.*]] = mul nsw i32 [[I21]], [[I24_1]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[I21_2]], i64 0
; CHECK-NEXT: [[I24_2:%.*]] = or disjoint i32 [[I4_05]], 3
; CHECK-NEXT: [[I21_3]] = mul nsw i32 [[I21_1]], [[I24_2]]
; CHECK-NEXT: [[I21_3:%.*]] = mul nsw i32 [[I21_1]], [[I24_2]]
; CHECK-NEXT: [[I3_SROA_0_4_VEC_INSERT33]] = insertelement <2 x i32> [[TMP1]], i32 [[I21_3]], i64 1
; CHECK-NEXT: [[I24_3]] = add nuw nsw i32 [[I4_05]], 4
; CHECK-NEXT: [[I11_NOT_3:%.*]] = icmp eq i32 [[I24_3]], [[I10]]
; CHECK-NEXT: br i1 [[I11_NOT_3]], label %[[BB12_LOOPEXIT]], label %[[BB13]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK-NEXT: br i1 [[I11_NOT_3]], label %[[BB12]], label %[[BB13]], !llvm.loop [[LOOP8:![0-9]+]]
;
bb:
%i = alloca i32, align 4
Expand Down
8 changes: 6 additions & 2 deletions llvm/test/Transforms/PhaseOrdering/swap-promotion.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@

define void @swap(ptr %p1, ptr %p2) {
; CHECK-LABEL: @swap(
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[P1:%.*]], align 1
; CHECK-NEXT: [[DOTUNPACK:%.*]] = load i32, ptr [[P1:%.*]], align 1
; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 4
; CHECK-NEXT: [[DOTUNPACK2:%.*]] = load i32, ptr [[DOTELT1]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[P2:%.*]], align 1
; CHECK-NEXT: store i64 [[TMP2]], ptr [[P1]], align 1
; CHECK-NEXT: store i64 [[TMP1]], ptr [[P2]], align 1
; CHECK-NEXT: store i32 [[DOTUNPACK]], ptr [[P2]], align 1
; CHECK-NEXT: [[P2_REPACK8:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 4
; CHECK-NEXT: store i32 [[DOTUNPACK2]], ptr [[P2_REPACK8]], align 1
; CHECK-NEXT: ret void
;
%tmp = alloca [2 x i32]
Expand Down