-
Notifications
You must be signed in to change notification settings - Fork 11.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SROA] Limit the number of allowed slices when trying to split allocas
This patch adds a hidden CLI option "--sroa-max-alloca-slices", which is an integer that controls the maximum number of alloca slices SROA can consider before bailing out. This is useful because it may not be profitable to split memcpys into (possibly tens of) thousands of loads/stores. This also prevents an issue with exponential compile time explosion in passes like DSE and MemCpyOpt caused by excessive alloca splitting. Fixes rust-lang/rust#88580. Differential Revision: https://reviews.llvm.org/D159354
- Loading branch information
Showing
2 changed files
with
185 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 | ||
; RUN: opt -passes='loop-unroll,sroa' --sroa-max-alloca-slices=16 -S -o - < %s | FileCheck %s | ||
|
||
; (Very) Reduced from this Rust code: | ||
; | ||
; extern "C" { | ||
; fn _use(x: [[[Option::<usize>; 5]; 5]; 5]) -> bool; | ||
; } | ||
; fn main() { | ||
; let s = [[[Option::<usize>::None; 5]; 5]; 5]; | ||
; unsafe { | ||
; _use(s); | ||
; } | ||
; } | ||
|
||
define void @huge_size() { | ||
; CHECK-LABEL: define void @huge_size() { | ||
; CHECK-NEXT: start: | ||
; CHECK-NEXT: [[ARRAY:%.*]] = alloca [5 x [5 x [5 x { i64, i64 }]]], align 8 | ||
; CHECK-NEXT: [[ARRAY_SUB_1:%.*]] = alloca [5 x [5 x { i64, i64 }]], align 8 | ||
; CHECK-NEXT: br label [[LOOP_1:%.*]] | ||
; CHECK: loop.1: | ||
; CHECK-NEXT: br label [[LOOP_2:%.*]] | ||
; CHECK: loop.2: | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_1]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 8 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_ARRAY_SUB_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 16 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_ARRAY_SUB_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 24 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_ARRAY_SUB_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 32 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_ARRAY_SUB_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 40 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_ARRAY_SUB_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 48 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_ARRAY_SUB_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 56 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_ARRAY_SUB_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 64 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_ARRAY_SUB_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_ARRAY_SUB_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_SUB_1]], i64 72 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_ARRAY_SUB_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[GEP_2_1:%.*]] = getelementptr [5 x [5 x { i64, i64 }]], ptr [[ARRAY_SUB_1]], i64 0, i64 1 | ||
; CHECK-NEXT: store i64 0, ptr [[GEP_2_1]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 8 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_GEP_2_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 16 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_GEP_2_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 24 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_GEP_2_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 32 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_GEP_2_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 40 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_GEP_2_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 48 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_GEP_2_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 56 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_GEP_2_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 64 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_GEP_2_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_GEP_2_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_1]], i64 72 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_GEP_2_1_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[GEP_2_2:%.*]] = getelementptr [5 x [5 x { i64, i64 }]], ptr [[ARRAY_SUB_1]], i64 0, i64 2 | ||
; CHECK-NEXT: store i64 0, ptr [[GEP_2_2]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 8 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_GEP_2_2_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 16 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_GEP_2_2_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 24 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_GEP_2_2_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 32 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_GEP_2_2_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 40 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_GEP_2_2_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 48 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_GEP_2_2_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 56 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_GEP_2_2_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 64 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_GEP_2_2_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_GEP_2_2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_2]], i64 72 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_GEP_2_2_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[GEP_2_3:%.*]] = getelementptr [5 x [5 x { i64, i64 }]], ptr [[ARRAY_SUB_1]], i64 0, i64 3 | ||
; CHECK-NEXT: store i64 0, ptr [[GEP_2_3]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 8 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_GEP_2_3_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 16 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_GEP_2_3_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 24 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_GEP_2_3_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 32 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_GEP_2_3_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 40 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_GEP_2_3_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 48 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_GEP_2_3_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 56 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_GEP_2_3_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 64 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_GEP_2_3_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_GEP_2_3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_3]], i64 72 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_GEP_2_3_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[GEP_2_4:%.*]] = getelementptr [5 x [5 x { i64, i64 }]], ptr [[ARRAY_SUB_1]], i64 0, i64 4 | ||
; CHECK-NEXT: store i64 0, ptr [[GEP_2_4]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_6_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 8 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_6_0_GEP_2_4_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_69_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 16 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_69_0_GEP_2_4_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_7_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 24 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_7_0_GEP_2_4_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_718_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 32 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_718_0_GEP_2_4_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_8_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 40 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_8_0_GEP_2_4_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_827_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 48 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_827_0_GEP_2_4_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_9_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 56 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_9_0_GEP_2_4_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_936_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 64 | ||
; CHECK-NEXT: store i64 0, ptr [[ARRAY_SUB_2_SROA_936_0_GEP_2_4_SROA_IDX]], align 1 | ||
; CHECK-NEXT: [[ARRAY_SUB_2_SROA_10_0_GEP_2_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_2_4]], i64 72 | ||
; CHECK-NEXT: store i64 undef, ptr [[ARRAY_SUB_2_SROA_10_0_GEP_2_4_SROA_IDX]], align 1 | ||
; CHECK-NEXT: br label [[LOOP_3:%.*]] | ||
; CHECK: loop.3: | ||
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ARRAY]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false) | ||
; CHECK-NEXT: [[GEP_3_1:%.*]] = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr [[ARRAY]], i64 0, i64 1 | ||
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[GEP_3_1]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false) | ||
; CHECK-NEXT: [[GEP_3_2:%.*]] = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr [[ARRAY]], i64 0, i64 2 | ||
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[GEP_3_2]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false) | ||
; CHECK-NEXT: [[GEP_3_3:%.*]] = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr [[ARRAY]], i64 0, i64 3 | ||
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[GEP_3_3]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false) | ||
; CHECK-NEXT: [[GEP_3_4:%.*]] = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr [[ARRAY]], i64 0, i64 4 | ||
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[GEP_3_4]], ptr [[ARRAY_SUB_1]], i64 1600, i1 false) | ||
; CHECK-NEXT: ret void | ||
; | ||
start: | ||
%array = alloca [5 x [5 x [5 x { i64, i64 }]]], align 8 | ||
%array.sub.1 = alloca [5 x [5 x { i64, i64 }]], align 8 | ||
%array.sub.2 = alloca [5 x { i64, i64 }], align 8 | ||
br label %loop.1 | ||
|
||
; Set up %array.sub.2 | ||
loop.1: | ||
%ind.1 = phi i64 [ 0, %start ], [ %ind.1.next, %loop.1 ] | ||
%gep.1 = getelementptr [5 x { i64, i64 }], ptr %array.sub.2, i64 0, i64 %ind.1 | ||
store i64 0, ptr %gep.1, align 8 | ||
%ind.1.next = add i64 %ind.1, 1 | ||
%loop1.cond = icmp ult i64 %ind.1.next, 5 | ||
br i1 %loop1.cond, label %loop.1, label %loop.2 | ||
|
||
; Set up %array.sub.1 | ||
loop.2: | ||
%ind.2 = phi i64 [ 0, %loop.1 ], [ %ind.2.next, %loop.2 ] | ||
%gep.2 = getelementptr [5 x [5 x { i64, i64 }]], ptr %array.sub.1, i64 0, i64 %ind.2 | ||
call void @llvm.memcpy.p0.p0.i64(ptr %gep.2, ptr %array.sub.2, i64 160, i1 false) | ||
%ind.2.next = add i64 %ind.2, 1 | ||
%loop.2.cond = icmp ult i64 %ind.2.next, 5 | ||
br i1 %loop.2.cond, label %loop.2, label %loop.3 | ||
|
||
; Set up %array | ||
loop.3: | ||
%ind.3 = phi i64 [ 0, %loop.2 ], [ %ind.3.next, %loop.3 ] | ||
%gep.3 = getelementptr [5 x [5 x [5 x { i64, i64 }]]], ptr %array, i64 0, i64 %ind.3 | ||
call void @llvm.memcpy.p0.p0.i64(ptr %gep.3, ptr %array.sub.1, i64 1600, i1 false) | ||
%ind.3.next = add i64 %ind.3, 1 | ||
%loop.3.cond = icmp ult i64 %ind.3.next, 5 | ||
br i1 %loop.3.cond, label %loop.3, label %exit | ||
|
||
exit: | ||
ret void | ||
} | ||
|
||
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) |
e13e808
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi, a heads up: this commit has caused increased stack or local memory usage for some CUDA kernels. We're trying to come up with an isolated test case.
e13e808
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
False alarm. It's just that the default value of
--sroa-max-alloca-slices
is too small for some code now. Increased this locally to around 3000 and it's working as previously.e13e808
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hm, if 1000 is too low for GPU targets, we might need a smarter heuristic here. (Something that takes into account whether we end up splitting a single memset/memcpy into a huge number of load/stores or not.)