Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize copying large ranges of undefmask blocks #58556

Merged
merged 8 commits into from Mar 15, 2019
76 changes: 63 additions & 13 deletions src/librustc/mir/interpret/allocation.rs
Expand Up @@ -100,8 +100,7 @@ impl AllocationExtra<(), ()> for () {
impl<Tag, Extra> Allocation<Tag, Extra> {
/// Creates a read-only allocation initialized by the given bytes
pub fn from_bytes(slice: &[u8], align: Align, extra: Extra) -> Self {
let mut undef_mask = UndefMask::new(Size::ZERO);
undef_mask.grow(Size::from_bytes(slice.len() as u64), true);
let undef_mask = UndefMask::new(Size::from_bytes(slice.len() as u64), true);
Self {
bytes: slice.to_owned(),
relocations: Relocations::new(),
Expand All @@ -121,7 +120,7 @@ impl<Tag, Extra> Allocation<Tag, Extra> {
Allocation {
bytes: vec![0; size.bytes() as usize],
relocations: Relocations::new(),
undef_mask: UndefMask::new(size),
undef_mask: UndefMask::new(size, false),
align,
mutability: Mutability::Mutable,
extra,
Expand Down Expand Up @@ -613,8 +612,9 @@ impl<Tag> DerefMut for Relocations<Tag> {
////////////////////////////////////////////////////////////////////////////////

type Block = u64;
const BLOCK_SIZE: u64 = 64;

/// A bitmask where each bit refers to the byte with the same index. If the bit is `true`, the byte
/// is defined. If it is `false` the byte is undefined.
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, RustcEncodable, RustcDecodable)]
pub struct UndefMask {
blocks: Vec<Block>,
Expand All @@ -624,12 +624,14 @@ pub struct UndefMask {
impl_stable_hash_for!(struct mir::interpret::UndefMask{blocks, len});

impl UndefMask {
pub fn new(size: Size) -> Self {
pub const BLOCK_SIZE: u64 = 64;

pub fn new(size: Size, state: bool) -> Self {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I infer that state here is interpreted as { true => defined, false => undefined }, (right?)

You might consider adding a comment above the header saying so. (My initial interpretation of "undef mask" was that if the bit is true, then it is undefined)

let mut m = UndefMask {
blocks: vec![],
len: Size::ZERO,
};
m.grow(size, false);
m.grow(size, state);
m
}

Expand All @@ -643,6 +645,7 @@ impl UndefMask {
return Err(self.len);
}

// FIXME(oli-obk): optimize this for allocations larger than a block.
let idx = (start.bytes()..end.bytes())
.map(|i| Size::from_bytes(i))
.find(|&i| !self.get(i));
Expand All @@ -662,20 +665,63 @@ impl UndefMask {
}

pub fn set_range_inbounds(&mut self, start: Size, end: Size, new_state: bool) {
for i in start.bytes()..end.bytes() {
self.set(Size::from_bytes(i), new_state);
let (blocka, bita) = bit_index(start);
let (blockb, bitb) = bit_index(end);
if blocka == blockb {
// first set all bits but the first `bita`
// then unset the last `64 - bitb` bits
let range = if bitb == 0 {
u64::max_value() << bita
} else {
(u64::max_value() << bita) & (u64::max_value() >> (64 - bitb))
};
if new_state {
self.blocks[blocka] |= range;
} else {
self.blocks[blocka] &= !range;
}
return;
}
// across block boundaries
if new_state {
// set bita..64 to 1
self.blocks[blocka] |= u64::max_value() << bita;
Copy link
Member

@pnkfelix pnkfelix Mar 12, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(aside: i'm sort of amazed libstd doesn't have named methods for these operations; I would think turning big ranges of bits on or off within a uN (or better still, arbitrary arrays or vectors of [uN]), would be so common as to have higher-level methods than shifts and bitwise-or-masking.)

// set 0..bitb to 1
if bitb != 0 {
self.blocks[blockb] |= u64::max_value() >> (64 - bitb);
}
// fill in all the other blocks (much faster than one bit at a time)
for block in (blocka + 1) .. blockb {
self.blocks[block] = u64::max_value();
}
} else {
// set bita..64 to 0
self.blocks[blocka] &= !(u64::max_value() << bita);
// set 0..bitb to 0
if bitb != 0 {
self.blocks[blockb] &= !(u64::max_value() >> (64 - bitb));
}
// fill in all the other blocks (much faster than one bit at a time)
for block in (blocka + 1) .. blockb {
self.blocks[block] = 0;
}
oli-obk marked this conversation as resolved.
Show resolved Hide resolved
}
}

#[inline]
pub fn get(&self, i: Size) -> bool {
let (block, bit) = bit_index(i);
(self.blocks[block] & 1 << bit) != 0
(self.blocks[block] & (1 << bit)) != 0
}

#[inline]
pub fn set(&mut self, i: Size, new_state: bool) {
let (block, bit) = bit_index(i);
self.set_bit(block, bit, new_state);
}

#[inline]
fn set_bit(&mut self, block: usize, bit: usize, new_state: bool) {
if new_state {
self.blocks[block] |= 1 << bit;
} else {
Expand All @@ -684,11 +730,15 @@ impl UndefMask {
}

pub fn grow(&mut self, amount: Size, new_state: bool) {
let unused_trailing_bits = self.blocks.len() as u64 * BLOCK_SIZE - self.len.bytes();
if amount.bytes() == 0 {
return;
}
let unused_trailing_bits = self.blocks.len() as u64 * Self::BLOCK_SIZE - self.len.bytes();
if amount.bytes() > unused_trailing_bits {
let additional_blocks = amount.bytes() / BLOCK_SIZE + 1;
let additional_blocks = amount.bytes() / Self::BLOCK_SIZE + 1;
assert_eq!(additional_blocks as usize as u64, additional_blocks);
self.blocks.extend(
// FIXME(oli-obk): optimize this by repeating `new_state as Block`
iter::repeat(0).take(additional_blocks as usize),
);
}
Expand All @@ -701,8 +751,8 @@ impl UndefMask {
#[inline]
fn bit_index(bits: Size) -> (usize, usize) {
let bits = bits.bytes();
let a = bits / BLOCK_SIZE;
let b = bits % BLOCK_SIZE;
let a = bits / UndefMask::BLOCK_SIZE;
let b = bits % UndefMask::BLOCK_SIZE;
assert_eq!(a as usize as u64, a);
assert_eq!(b as usize as u64, b);
(a as usize, b as usize)
Expand Down
102 changes: 76 additions & 26 deletions src/librustc_mir/interpret/memory.rs
Expand Up @@ -700,24 +700,29 @@ impl<'a, 'mir, 'tcx, M: Machine<'a, 'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> {
// relocations overlapping the edges; those would not be handled correctly).
let relocations = {
let relocations = self.get(src.alloc_id)?.relocations(self, src, size);
let mut new_relocations = Vec::with_capacity(relocations.len() * (length as usize));
for i in 0..length {
new_relocations.extend(
relocations
.iter()
.map(|&(offset, reloc)| {
// compute offset for current repetition
let dest_offset = dest.offset + (i * size);
(
// shift offsets from source allocation to destination allocation
offset + dest_offset - src.offset,
reloc,
)
})
);
}
if relocations.is_empty() {
// nothing to copy, ignore even the `length` loop
Vec::new()
} else {
let mut new_relocations = Vec::with_capacity(relocations.len() * (length as usize));
for i in 0..length {
new_relocations.extend(
relocations
.iter()
.map(|&(offset, reloc)| {
// compute offset for current repetition
let dest_offset = dest.offset + (i * size);
(
// shift offsets from source allocation to destination allocation
offset + dest_offset - src.offset,
reloc,
)
})
);
}

new_relocations
new_relocations
}
};

let tcx = self.tcx.tcx;
Expand Down Expand Up @@ -784,20 +789,65 @@ impl<'a, 'mir, 'tcx, M: Machine<'a, 'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> {
// The bits have to be saved locally before writing to dest in case src and dest overlap.
assert_eq!(size.bytes() as usize as u64, size.bytes());

let undef_mask = self.get(src.alloc_id)?.undef_mask.clone();
let dest_allocation = self.get_mut(dest.alloc_id)?;
let undef_mask = &self.get(src.alloc_id)?.undef_mask;

// Since we are copying `size` bytes from `src` to `dest + i * size` (`for i in 0..repeat`),
// a naive undef mask copying algorithm would repeatedly have to read the undef mask from
// the source and write it to the destination. Even if we optimized the memory accesses,
// we'd be doing all of this `repeat` times.
// Therefor we precompute a compressed version of the undef mask of the source value and
// then write it back `repeat` times without computing any more information from the source.

// a precomputed cache for ranges of defined/undefined bits
// 0000010010001110 will become
// [5, 1, 2, 1, 3, 3, 1]
// where each element toggles the state
let mut ranges = smallvec::SmallVec::<[u64; 1]>::new();
let first = undef_mask.get(src.offset);
let mut cur_len = 1;
let mut cur = first;
for i in 1..size.bytes() {
// FIXME: optimize to bitshift the current undef block's bits and read the top bit
if undef_mask.get(src.offset + Size::from_bytes(i)) == cur {
cur_len += 1;
} else {
ranges.push(cur_len);
cur_len = 1;
cur = !cur;
}
}

for i in 0..size.bytes() {
let defined = undef_mask.get(src.offset + Size::from_bytes(i));
// now fill in all the data
let dest_allocation = self.get_mut(dest.alloc_id)?;
// an optimization where we can just overwrite an entire range of definedness bits if
// they are going to be uniformly `1` or `0`.
if ranges.is_empty() {
dest_allocation.undef_mask.set_range_inbounds(
dest.offset,
dest.offset + size * repeat,
first,
);
return Ok(())
}

for j in 0..repeat {
dest_allocation.undef_mask.set(
dest.offset + Size::from_bytes(i + (size.bytes() * j)),
defined
// remember to fill in the trailing bits
ranges.push(cur_len);

for mut j in 0..repeat {
j *= size.bytes();
j += dest.offset.bytes();
let mut cur = first;
for range in &ranges {
let old_j = j;
j += range;
dest_allocation.undef_mask.set_range_inbounds(
Size::from_bytes(old_j),
Size::from_bytes(j),
cur,
);
cur = !cur;
}
}

Ok(())
}
}
26 changes: 26 additions & 0 deletions src/test/run-pass-fulldeps/undef_mask.rs
@@ -0,0 +1,26 @@
// ignore-cross-compile
// ignore-stage1

#![feature(rustc_private)]

extern crate rustc;

use rustc::mir::interpret::UndefMask;
use rustc::ty::layout::Size;

fn main() {
let mut mask = UndefMask::new(Size::from_bytes(500), false);
assert!(!mask.get(Size::from_bytes(499)));
mask.set(Size::from_bytes(499), true);
assert!(mask.get(Size::from_bytes(499)));
mask.set_range_inbounds(Size::from_bytes(100), Size::from_bytes(256), true);
for i in 0..100 {
assert!(!mask.get(Size::from_bytes(i)));
}
for i in 100..256 {
assert!(mask.get(Size::from_bytes(i)));
}
for i in 256..499 {
assert!(!mask.get(Size::from_bytes(i)));
}
}