diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index ddc3d7eaef8a7..dbf4020c61270 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -121,6 +121,14 @@ static cl::opt SROAStrictInbounds("sroa-strict-inbounds", cl::init(false), /// Disable running mem2reg during SROA in order to test or debug SROA. static cl::opt SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden); + +/// The maximum number of alloca slices allowed when splitting. +static cl::opt + SROAMaxAllocaSlices("sroa-max-alloca-slices", cl::init(1024), + cl::desc("Maximum number of alloca slices allowed " + "after which splitting is not attempted"), + cl::Hidden); + namespace { /// Calculate the fragment of a variable to use when slicing a store @@ -4961,6 +4969,9 @@ SROAPass::runOnAlloca(AllocaInst &AI) { if (AS.isEscaped()) return {Changed, CFGChanged}; + if (std::distance(AS.begin(), AS.end()) > SROAMaxAllocaSlices) + return {Changed, CFGChanged}; + // Delete all the dead users of this alloca before splitting and rewriting it. for (Instruction *DeadUser : AS.getDeadUsers()) { // Free up everything used by this instruction. diff --git a/llvm/test/Transforms/SROA/huge-size.ll b/llvm/test/Transforms/SROA/huge-size.ll new file mode 100644 index 0000000000000..14102920c0888 --- /dev/null +++ b/llvm/test/Transforms/SROA/huge-size.ll @@ -0,0 +1,174 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -passes='loop-unroll,sroa' --sroa-max-alloca-slices=16 -S -o - < %s | FileCheck %s + +; (Very) Reduced from this Rust code: +; +; extern "C" { +; fn _use(x: [[[Option::; 5]; 5]; 5]) -> bool; +; } +; fn main() { +; let s = [[[Option::::None; 5]; 5]; 5]; +; unsafe { +; _use(s); +; } +; } + +define void @huge_size() { +; CHECK-LABEL: define void @huge_size() { +; CHECK-NEXT: start: +; CHECK-NEXT: [[ARRAY:%.*]] = alloca [5 x [5 x [5 x { i64, i64 }]]], align 8 +; CHECK-NEXT: [[ARRAY_SUB_1:%.*]] = alloca [5 x [5 x { i64, i64 }]], align 8 +; CHECK-NEXT: br label [[LOOP_1:%.*]] +; CHECK: loop.1: +; CHECK-NEXT: br label [[LOOP_2:%.*]] +; CHECK: loop.2: +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_1]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 8 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_ARRAY_SUB_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 16 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_ARRAY_SUB_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 24 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_ARRAY_SUB_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 32 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_ARRAY_SUB_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 40 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_ARRAY_SUB_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 48 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_ARRAY_SUB_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 56 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_ARRAY_SUB_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 64 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_ARRAY_SUB_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 72 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_ARRAY_SUB_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[GEP_2_1:%.*]] = getelementptr [5 x [5 x { i64, i64 }]], ptr [[ARRAY_SUB_1]], i64 0, i64 1 +; CHECK-NEXT: store i64 0, ptr [[GEP_2_1]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 8 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_GEP_2_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 16 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_GEP_2_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 24 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_GEP_2_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 32 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_GEP_2_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 40 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_GEP_2_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 48 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_GEP_2_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 56 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_GEP_2_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 64 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_GEP_2_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 72 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_GEP_2_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[GEP_2_2:%.*]] = getelementptr [5 x [5 x { i64, i64 }]], ptr [[ARRAY_SUB_1]], i64 0, i64 2 +; CHECK-NEXT: store i64 0, ptr [[GEP_2_2]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 8 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_GEP_2_2_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 16 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_GEP_2_2_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 24 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_GEP_2_2_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 32 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_GEP_2_2_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 40 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_GEP_2_2_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 48 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_GEP_2_2_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 56 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_GEP_2_2_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 64 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_GEP_2_2_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 72 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_GEP_2_2_SROA_IDX]], align 1 +; CHECK-NEXT: [[GEP_2_3:%.*]] = getelementptr [5 x [5 x { i64, i64 }]], ptr [[ARRAY_SUB_1]], i64 0, i64 3 +; CHECK-NEXT: store i64 0, ptr [[GEP_2_3]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 8 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_GEP_2_3_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 16 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_GEP_2_3_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 24 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_GEP_2_3_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 32 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_GEP_2_3_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 40 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_GEP_2_3_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 48 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_GEP_2_3_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 56 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_GEP_2_3_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 64 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_GEP_2_3_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 72 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_GEP_2_3_SROA_IDX]], align 1 +; CHECK-NEXT: [[GEP_2_4:%.*]] = getelementptr [5 x [5 x { i64, i64 }]], ptr [[ARRAY_SUB_1]], i64 0, i64 4 +; CHECK-NEXT: store i64 0, ptr [[GEP_2_4]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 8 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_GEP_2_4_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 16 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_GEP_2_4_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 24 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_GEP_2_4_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 32 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_GEP_2_4_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 40 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_GEP_2_4_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 48 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_GEP_2_4_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 56 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_GEP_2_4_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 64 +; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_GEP_2_4_SROA_IDX]], align 1 +; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 72 +; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_GEP_2_4_SROA_IDX]], align 1 +; CHECK-NEXT: br label [[LOOP_3:%.*]] +; CHECK: loop.3: +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ARRAY]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false) +; CHECK-NEXT: [[GEP_3_1:%.*]] = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr [[ARRAY]], i64 0, i64 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[GEP_3_1]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false) +; CHECK-NEXT: [[GEP_3_2:%.*]] = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr [[ARRAY]], i64 0, i64 2 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[GEP_3_2]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false) +; CHECK-NEXT: [[GEP_3_3:%.*]] = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr [[ARRAY]], i64 0, i64 3 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[GEP_3_3]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false) +; CHECK-NEXT: [[GEP_3_4:%.*]] = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr [[ARRAY]], i64 0, i64 4 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[GEP_3_4]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false) +; CHECK-NEXT: ret void +; +start: + %array = alloca [5 x [5 x [5 x { i64, i64 }]]], align 8 + %array.sub.1 = alloca [5 x [5 x { i64, i64 }]], align 8 + %array.sub.2 = alloca [5 x { i64, i64 }], align 8 + br label %loop.1 + +; Set up %array.sub.2 +loop.1: + %ind.1 = phi i64 [ 0, %start ], [ %ind.1.next, %loop.1 ] + %gep.1 = getelementptr [5 x { i64, i64 }], ptr %array.sub.2, i64 0, i64 %ind.1 + store i64 0, ptr %gep.1, align 8 + %ind.1.next = add i64 %ind.1, 1 + %loop1.cond = icmp ult i64 %ind.1.next, 5 + br i1 %loop1.cond, label %loop.1, label %loop.2 + +; Set up %array.sub.1 +loop.2: + %ind.2 = phi i64 [ 0, %loop.1 ], [ %ind.2.next, %loop.2 ] + %gep.2 = getelementptr [5 x [5 x { i64, i64 }]], ptr %array.sub.1, i64 0, i64 %ind.2 + call void @llvm.memcpy.p0.p0.i64(ptr %gep.2, ptr %array.sub.2, i64 160, i1 false) + %ind.2.next = add i64 %ind.2, 1 + %loop.2.cond = icmp ult i64 %ind.2.next, 5 + br i1 %loop.2.cond, label %loop.2, label %loop.3 + +; Set up %array +loop.3: + %ind.3 = phi i64 [ 0, %loop.2 ], [ %ind.3.next, %loop.3 ] + %gep.3 = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr %array, i64 0, i64 %ind.3 + call void @llvm.memcpy.p0.p0.i64(ptr %gep.3, ptr %array.sub.1, i64 1600, i1 false) + %ind.3.next = add i64 %ind.3, 1 + %loop.3.cond = icmp ult i64 %ind.3.next, 5 + br i1 %loop.3.cond, label %loop.3, label %exit + +exit: + ret void +} + +declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)