/
defer.rs
188 lines (166 loc) · 6.09 KB
/
defer.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
use std::{
cell::UnsafeCell,
mem::{ManuallyDrop, MaybeUninit},
};
/// Defers input usage and output drop during benchmarking.
///
/// To reduce memory usage, this only allocates storage for inputs if outputs do
/// not need deferred drop.
pub(crate) union DeferStore<I, O> {
/// The variant used if outputs need to be dropped.
///
/// Inputs are stored are stored contiguously with outputs in memory. This
/// improves performance by:
/// - Removing the overhead of `zip` between two separate buffers.
/// - Improving cache locality and cache prefetching. Input is strategically
/// placed before output because iteration is from low to high addresses,
/// so doing this makes memory access patterns very predictable.
slots: ManuallyDrop<Vec<DeferSlot<I, O>>>,
/// The variant used if `Self::ONLY_INPUTS`, i.e. outputs do not need to be
/// dropped.
inputs: ManuallyDrop<Vec<DeferSlotItem<I>>>,
}
impl<I, O> Drop for DeferStore<I, O> {
#[inline]
fn drop(&mut self) {
// SAFETY: The correct variant is used based on `ONLY_INPUTS`.
unsafe {
if Self::ONLY_INPUTS {
ManuallyDrop::drop(&mut self.inputs)
} else {
ManuallyDrop::drop(&mut self.slots)
}
}
}
}
impl<I, O> Default for DeferStore<I, O> {
#[inline]
fn default() -> Self {
// SAFETY: The correct variant is used based on `ONLY_INPUTS`.
unsafe {
if Self::ONLY_INPUTS {
Self { inputs: ManuallyDrop::new(Vec::new()) }
} else {
Self { slots: ManuallyDrop::new(Vec::new()) }
}
}
}
}
impl<I, O> DeferStore<I, O> {
/// Whether only inputs need to be deferred.
///
/// If `true`, outputs do not get inserted into `DeferStore`.
const ONLY_INPUTS: bool = !std::mem::needs_drop::<O>();
/// Prepares storage for iterating over `DeferSlot`s for a sample.
#[inline]
pub fn prepare(&mut self, sample_size: usize) {
// Common implementation regardless of `Vec` item type.
macro_rules! imp {
($vec:expr) => {{
$vec.clear();
$vec.reserve_exact(sample_size);
// SAFETY: `Vec` only contains `MaybeUninit` fields, so values
// may be safely created from uninitialized memory.
unsafe { $vec.set_len(sample_size) }
}};
}
// SAFETY: The correct variant is used based on `ONLY_INPUTS`.
unsafe {
if Self::ONLY_INPUTS {
imp!(self.inputs)
} else {
imp!(self.slots)
}
}
}
/// Returns the sample's slots for iteration.
///
/// The caller is expected to use the returned slice to initialize inputs
/// for the sample loop.
///
/// This returns `Err` containing only input slots if `O` does not need
/// deferred drop. Ideally this would be implemented directly on `DeferSlot`
/// but there's no way to change its size based on `needs_drop::<O>()`.
#[inline(always)]
pub fn slots(&self) -> Result<&[DeferSlot<I, O>], &[DeferSlotItem<I>]> {
unsafe {
if Self::ONLY_INPUTS {
Err(&self.inputs)
} else {
Ok(&self.slots)
}
}
}
}
/// Storage for a single iteration within a sample.
///
/// Input is stored before output to improve cache prefetching since iteration
/// progresses from low to high addresses.
///
/// # UnsafeCell
///
/// `UnsafeCell` is used to allow `output` to safely refer to `input`. Although
/// `output` itself is never aliased, it is also stored as `UnsafeCell` in order
/// to get mutable access through a shared `&DeferSlot`.
///
/// # Safety
///
/// All fields **must** be `MaybeUninit`. This allows us to safely set the
/// length of `Vec<DeferSlot>` within the allocated capacity.
#[repr(C)]
pub(crate) struct DeferSlot<I, O> {
pub input: DeferSlotItem<I>,
pub output: DeferSlotItem<O>,
}
type DeferSlotItem<T> = UnsafeCell<MaybeUninit<T>>;
#[cfg(test)]
mod tests {
use super::*;
/// Tests that accessing an uninitialized `DeferSlot` is safe due to all of
/// its fields being `MaybeUninit`.
#[test]
fn access_uninit_slot() {
let mut slot: MaybeUninit<DeferSlot<String, String>> = MaybeUninit::uninit();
let slot_ref = unsafe { slot.assume_init_mut() };
slot_ref.input = UnsafeCell::new(MaybeUninit::new(String::new()));
slot_ref.output = UnsafeCell::new(MaybeUninit::new(String::new()));
unsafe {
let slot = slot.assume_init();
assert_eq!(slot.input.into_inner().assume_init(), "");
assert_eq!(slot.output.into_inner().assume_init(), "");
}
}
/// Tests that accessing `DeferSlot.input` through an aliased reference in
/// `DeferSlot.output` is safe due `input` being an `UnsafeCell`.
#[test]
fn access_aliased_input() {
struct Output<'i> {
input: &'i mut String,
}
impl Drop for Output<'_> {
fn drop(&mut self) {
assert_eq!(self.input, "hello");
self.input.push_str(" world");
}
}
let slot: MaybeUninit<DeferSlot<String, Output>> = MaybeUninit::uninit();
let slot_ref = unsafe { slot.assume_init_ref() };
// Loop to ensure previous iterations don't affect later uses of the
// same entry slot.
for _ in 0..5 {
unsafe {
let input_ptr = slot_ref.input.get().cast::<String>();
let output_ptr = slot_ref.output.get().cast::<Output>();
// Initialize input and output.
input_ptr.write("hello".to_owned());
output_ptr.write(Output { input: &mut *input_ptr });
// Use and discard output.
assert_eq!((*output_ptr).input, "hello");
output_ptr.drop_in_place();
assert_eq!(&*input_ptr, "hello world");
// Discard input.
input_ptr.drop_in_place();
}
}
}
}