Skip to content

Commit

Permalink
Use undef for partially-uninit constants up to 1024 bytes
Browse files Browse the repository at this point in the history
There needs to be some limit to avoid perf regressions on large arrays
with undef in each element (see comment in the code).
  • Loading branch information
erikdesjardins committed Feb 18, 2022
1 parent b8c56fa commit b7e5597
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 43 deletions.
6 changes: 4 additions & 2 deletions compiler/rustc_codegen_llvm/src/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,10 @@ pub fn const_alloc_to_llvm<'ll>(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) ->
}
};

// Generating partially-uninit consts inhibits optimizations, so it is disabled by default.
// See https://github.com/rust-lang/rust/issues/84565.
// Generating partially-uninit consts is limited to small allocations,
// to avoid the cost of generating large complex const expressions.
// For example, `[(u32, u8); 1024 * 1024]` contains uninit padding in each element,
// and would result in `{ [5 x i8] zeroinitializer, [3 x i8] undef, ...repeat 1M times... }`.
let allow_partially_uninit =
match cx.sess().opts.debugging_opts.partially_uninit_const_threshold {
Some(max) => range.len() <= max,
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_session/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1341,9 +1341,9 @@ options! {
"panic strategy for panics in drops"),
parse_only: bool = (false, parse_bool, [UNTRACKED],
"parse only; do not compile, assemble, or link (default: no)"),
partially_uninit_const_threshold: Option<usize> = (None, parse_opt_number, [TRACKED],
partially_uninit_const_threshold: Option<usize> = (Some(1024), parse_opt_number, [TRACKED],
"allow generating const initializers with mixed init/uninit bytes, \
and set the maximum total size of a const allocation for which this is allowed (default: never)"),
and set the maximum total size of a const allocation for which this is allowed (default: 1024 bytes)"),
perf_stats: bool = (false, parse_bool, [UNTRACKED],
"print some performance-related statistics (default: no)"),
pick_stable_methods_before_any_unstable: bool = (true, parse_bool, [TRACKED],
Expand Down
4 changes: 2 additions & 2 deletions src/test/codegen/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ pub fn inline_enum_const() -> E<i8, i16> {
#[no_mangle]
pub fn low_align_const() -> E<i16, [i16; 3]> {
// Check that low_align_const and high_align_const use the same constant
// CHECK: memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 2 %1, i8* align 2 getelementptr inbounds (<{ [8 x i8] }>, <{ [8 x i8] }>* [[LOW_HIGH]], i32 0, i32 0, i32 0), i{{(32|64)}} 8, i1 false)
// CHECK: memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 2 %1, i8* align 2 getelementptr inbounds (<{ [4 x i8], [4 x i8] }>, <{ [4 x i8], [4 x i8] }>* [[LOW_HIGH]], i32 0, i32 0, i32 0), i{{(32|64)}} 8, i1 false)
*&E::A(0)
}

// CHECK-LABEL: @high_align_const
#[no_mangle]
pub fn high_align_const() -> E<i16, i32> {
// Check that low_align_const and high_align_const use the same constant
// CHECK: memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 4 %1, i8* align 4 getelementptr inbounds (<{ [8 x i8] }>, <{ [8 x i8] }>* [[LOW_HIGH]], i32 0, i32 0, i32 0), i{{(32|64)}} 8, i1 false)
// CHECK: memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 4 %1, i8* align 4 getelementptr inbounds (<{ [4 x i8], [4 x i8] }>, <{ [4 x i8], [4 x i8] }>* [[LOW_HIGH]], i32 0, i32 0, i32 0), i{{(32|64)}} 8, i1 false)
*&E::A(0)
}
35 changes: 0 additions & 35 deletions src/test/codegen/uninit-consts-allow-partially-uninit.rs

This file was deleted.

17 changes: 15 additions & 2 deletions src/test/codegen/uninit-consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@ pub struct PartiallyUninit {
}

// CHECK: [[FULLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [10 x i8] }> undef
// CHECK: [[PARTIALLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [16 x i8] }> <{ [16 x i8] c"\EF\BE\AD\DE\00\00\00\00\00\00\00\00\00\00\00\00" }>, align 4

// CHECK: [[PARTIALLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [4 x i8], [12 x i8] }> <{ [4 x i8] c"\EF\BE\AD\DE", [12 x i8] undef }>, align 4

// This shouldn't contain undef, since it's larger than the 1024 byte limit.
// CHECK: [[UNINIT_PADDING_HUGE:@[0-9]+]] = private unnamed_addr constant <{ [32768 x i8] }> <{ [32768 x i8] c"{{.+}}" }>, align 4

// CHECK: [[FULLY_UNINIT_HUGE:@[0-9]+]] = private unnamed_addr constant <{ [16384 x i8] }> undef

// CHECK-LABEL: @fully_uninit
Expand All @@ -27,7 +32,15 @@ pub const fn fully_uninit() -> MaybeUninit<[u8; 10]> {
#[no_mangle]
pub const fn partially_uninit() -> PartiallyUninit {
const X: PartiallyUninit = PartiallyUninit { x: 0xdeadbeef, y: MaybeUninit::uninit() };
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 4 %1, i8* align 4 getelementptr inbounds (<{ [16 x i8] }>, <{ [16 x i8] }>* [[PARTIALLY_UNINIT]], i32 0, i32 0, i32 0), i{{(32|64)}} 16, i1 false)
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 4 %1, i8* align 4 getelementptr inbounds (<{ [4 x i8], [12 x i8] }>, <{ [4 x i8], [12 x i8] }>* [[PARTIALLY_UNINIT]], i32 0, i32 0, i32 0), i{{(32|64)}} 16, i1 false)
X
}

// CHECK-LABEL: @uninit_padding_huge
#[no_mangle]
pub const fn uninit_padding_huge() -> [(u32, u8); 4096] {
const X: [(u32, u8); 4096] = [(123, 45); 4096];
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 4 %1, i8* align 4 getelementptr inbounds (<{ [32768 x i8] }>, <{ [32768 x i8] }>* [[UNINIT_PADDING_HUGE]], i32 0, i32 0, i32 0), i{{(32|64)}} 32768, i1 false)
X
}

Expand Down

0 comments on commit b7e5597

Please sign in to comment.