Skip to content

Commit

Permalink
[SROA] Limit the number of allowed slices when trying to split allocas
Browse files Browse the repository at this point in the history
This patch adds a hidden CLI option "--sroa-max-alloca-slices", which is
an integer that controls the maximum number of alloca slices SROA can
consider before bailing out. This is useful because it may not be
profitable to split memcpys into (possibly tens of) thousands of loads/stores.
This also prevents an issue with exponential compile time explosion in passes
like DSE and MemCpyOpt caused by excessive alloca splitting.

Fixes rust-lang/rust#88580.

Differential Revision: https://reviews.llvm.org/D159354
  • Loading branch information
dc03 committed Sep 9, 2023
1 parent f807494 commit e13e808
Show file tree
Hide file tree
Showing 2 changed files with 185 additions and 0 deletions.
11 changes: 11 additions & 0 deletions llvm/lib/Transforms/Scalar/SROA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,14 @@ static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false),
/// Disable running mem2reg during SROA in order to test or debug SROA.
static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false),
cl::Hidden);

/// The maximum number of alloca slices allowed when splitting.
static cl::opt<int>
SROAMaxAllocaSlices("sroa-max-alloca-slices", cl::init(1024),
cl::desc("Maximum number of alloca slices allowed "
"after which splitting is not attempted"),
cl::Hidden);

namespace {

/// Calculate the fragment of a variable to use when slicing a store
Expand Down Expand Up @@ -4961,6 +4969,9 @@ SROAPass::runOnAlloca(AllocaInst &AI) {
if (AS.isEscaped())
return {Changed, CFGChanged};

if (std::distance(AS.begin(), AS.end()) > SROAMaxAllocaSlices)
return {Changed, CFGChanged};

// Delete all the dead users of this alloca before splitting and rewriting it.
for (Instruction *DeadUser : AS.getDeadUsers()) {
// Free up everything used by this instruction.
Expand Down
174 changes: 174 additions & 0 deletions llvm/test/Transforms/SROA/huge-size.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -passes='loop-unroll,sroa' --sroa-max-alloca-slices=16 -S -o - < %s | FileCheck %s

; (Very) Reduced from this Rust code:
;
; extern "C" {
; fn _use(x: [[[Option::<usize>; 5]; 5]; 5]) -> bool;
; }
; fn main() {
; let s = [[[Option::<usize>::None; 5]; 5]; 5];
; unsafe {
; _use(s);
; }
; }

define void @huge_size() {
; CHECK-LABEL: define void @huge_size() {
; CHECK-NEXT: start:
; CHECK-NEXT: [[ARRAY:%.*]] = alloca [5 x [5 x [5 x { i64, i64 }]]], align 8
; CHECK-NEXT: [[ARRAY_SUB_1:%.*]] = alloca [5 x [5 x { i64, i64 }]], align 8
; CHECK-NEXT: br label [[LOOP_1:%.*]]
; CHECK: loop.1:
; CHECK-NEXT: br label [[LOOP_2:%.*]]
; CHECK: loop.2:
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_1]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 8
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 16
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 24
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 32
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 40
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 48
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 56
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 64
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 72
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_ARRAY_SUB_1_SROA_IDX]], align 1
; CHECK-NEXT: [[GEP_2_1:%.*]] = getelementptr [5 x [5 x { i64, i64 }]], ptr [[ARRAY_SUB_1]], i64 0, i64 1
; CHECK-NEXT: store i64 0, ptr [[GEP_2_1]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 8
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 16
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 24
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 32
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 40
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 48
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 56
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 64
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 72
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_GEP_2_1_SROA_IDX]], align 1
; CHECK-NEXT: [[GEP_2_2:%.*]] = getelementptr [5 x [5 x { i64, i64 }]], ptr [[ARRAY_SUB_1]], i64 0, i64 2
; CHECK-NEXT: store i64 0, ptr [[GEP_2_2]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 8
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 16
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 24
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 32
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 40
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 48
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 56
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 64
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 72
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_GEP_2_2_SROA_IDX]], align 1
; CHECK-NEXT: [[GEP_2_3:%.*]] = getelementptr [5 x [5 x { i64, i64 }]], ptr [[ARRAY_SUB_1]], i64 0, i64 3
; CHECK-NEXT: store i64 0, ptr [[GEP_2_3]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 8
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 16
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 24
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 32
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 40
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 48
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 56
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 64
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 72
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_GEP_2_3_SROA_IDX]], align 1
; CHECK-NEXT: [[GEP_2_4:%.*]] = getelementptr [5 x [5 x { i64, i64 }]], ptr [[ARRAY_SUB_1]], i64 0, i64 4
; CHECK-NEXT: store i64 0, ptr [[GEP_2_4]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 8
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 16
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 24
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 32
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 40
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 48
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 56
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 64
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 72
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_GEP_2_4_SROA_IDX]], align 1
; CHECK-NEXT: br label [[LOOP_3:%.*]]
; CHECK: loop.3:
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ARRAY]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false)
; CHECK-NEXT: [[GEP_3_1:%.*]] = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr [[ARRAY]], i64 0, i64 1
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[GEP_3_1]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false)
; CHECK-NEXT: [[GEP_3_2:%.*]] = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr [[ARRAY]], i64 0, i64 2
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[GEP_3_2]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false)
; CHECK-NEXT: [[GEP_3_3:%.*]] = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr [[ARRAY]], i64 0, i64 3
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[GEP_3_3]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false)
; CHECK-NEXT: [[GEP_3_4:%.*]] = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr [[ARRAY]], i64 0, i64 4
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[GEP_3_4]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false)
; CHECK-NEXT: ret void
;
start:
%array = alloca [5 x [5 x [5 x { i64, i64 }]]], align 8
%array.sub.1 = alloca [5 x [5 x { i64, i64 }]], align 8
%array.sub.2 = alloca [5 x { i64, i64 }], align 8
br label %loop.1

; Set up %array.sub.2
loop.1:
%ind.1 = phi i64 [ 0, %start ], [ %ind.1.next, %loop.1 ]
%gep.1 = getelementptr [5 x { i64, i64 }], ptr %array.sub.2, i64 0, i64 %ind.1
store i64 0, ptr %gep.1, align 8
%ind.1.next = add i64 %ind.1, 1
%loop1.cond = icmp ult i64 %ind.1.next, 5
br i1 %loop1.cond, label %loop.1, label %loop.2

; Set up %array.sub.1
loop.2:
%ind.2 = phi i64 [ 0, %loop.1 ], [ %ind.2.next, %loop.2 ]
%gep.2 = getelementptr [5 x [5 x { i64, i64 }]], ptr %array.sub.1, i64 0, i64 %ind.2
call void @llvm.memcpy.p0.p0.i64(ptr %gep.2, ptr %array.sub.2, i64 160, i1 false)
%ind.2.next = add i64 %ind.2, 1
%loop.2.cond = icmp ult i64 %ind.2.next, 5
br i1 %loop.2.cond, label %loop.2, label %loop.3

; Set up %array
loop.3:
%ind.3 = phi i64 [ 0, %loop.2 ], [ %ind.3.next, %loop.3 ]
%gep.3 = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr %array, i64 0, i64 %ind.3
call void @llvm.memcpy.p0.p0.i64(ptr %gep.3, ptr %array.sub.1, i64 1600, i1 false)
%ind.3.next = add i64 %ind.3, 1
%loop.3.cond = icmp ult i64 %ind.3.next, 5
br i1 %loop.3.cond, label %loop.3, label %exit

exit:
ret void
}

declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)

3 comments on commit e13e808

@alexfh
Copy link
Contributor

@alexfh alexfh commented on e13e808 Sep 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, a heads up: this commit has caused increased stack or local memory usage for some CUDA kernels. We're trying to come up with an isolated test case.

@alexfh
Copy link
Contributor

@alexfh alexfh commented on e13e808 Sep 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, a heads up: this commit has caused increased stack or local memory usage for some CUDA kernels. We're trying to come up with an isolated test case.

False alarm. It's just that the default value of --sroa-max-alloca-slices is too small for some code now. Increased this locally to around 3000 and it's working as previously.

@nikic
Copy link
Contributor

@nikic nikic commented on e13e808 Sep 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, if 1000 is too low for GPU targets, we might need a smarter heuristic here. (Something that takes into account whether we end up splitting a single memset/memcpy into a huge number of load/stores or not.)

Please sign in to comment.