Skip to content

Commit

Permalink
vec: use offset_inbounds for iterators
Browse files Browse the repository at this point in the history
This allows LLVM to optimize vector iterators to an `getelementptr` and
`icmp` pair, instead of `getelementptr` and *two* comparisons.

Code snippet:

~~~
fn foo(xs: &mut [f64]) {
    for x in xs.mut_iter() {
        *x += 10.0;
    }
}
~~~

LLVM IR at stage0:

~~~
; Function Attrs: noinline uwtable
define void @"_ZN3foo17_68e1b25bca131dba7_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }* nocapture, { double*, i64 }* nocapture) #1 {
"function top level":
  %2 = getelementptr inbounds { double*, i64 }* %1, i64 0, i32 0
  %3 = load double** %2, align 8
  %4 = getelementptr inbounds { double*, i64 }* %1, i64 0, i32 1
  %5 = load i64* %4, align 8
  %6 = ptrtoint double* %3 to i64
  %7 = and i64 %5, -8
  %8 = add i64 %7, %6
  %9 = inttoptr i64 %8 to double*
  %10 = icmp eq double* %3, %9
  %11 = icmp eq double* %3, null
  %or.cond6 = or i1 %10, %11
  br i1 %or.cond6, label %match_case, label %match_else

match_else:                                       ; preds = %"function top level", %match_else
  %12 = phi double* [ %13, %match_else ], [ %3, %"function top level" ]
  %13 = getelementptr double* %12, i64 1
  %14 = load double* %12, align 8
  %15 = fadd double %14, 1.000000e+01
  store double %15, double* %12, align 8
  %16 = icmp eq double* %13, %9
  %17 = icmp eq double* %13, null
  %or.cond = or i1 %16, %17
  br i1 %or.cond, label %match_case, label %match_else

match_case:                                       ; preds = %match_else, %"function top level"
  ret void
}
~~~

Optimized LLVM IR at stage1/stage2:

~~~
; Function Attrs: noinline uwtable
define void @"_ZN3foo17_68e1b25bca131dba7_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }* nocapture, { double*, i64 }* nocapture) #1 {
"function top level":
  %2 = getelementptr inbounds { double*, i64 }* %1, i64 0, i32 0
  %3 = load double** %2, align 8
  %4 = getelementptr inbounds { double*, i64 }* %1, i64 0, i32 1
  %5 = load i64* %4, align 8
  %6 = lshr i64 %5, 3
  %7 = getelementptr inbounds double* %3, i64 %6
  %8 = icmp eq i64 %6, 0
  %9 = icmp eq double* %3, null
  %or.cond6 = or i1 %8, %9
  br i1 %or.cond6, label %match_case, label %match_else

match_else:                                       ; preds = %"function top level", %match_else
  %.sroa.0.0.in7 = phi double* [ %10, %match_else ], [ %3, %"function top level" ]
  %10 = getelementptr inbounds double* %.sroa.0.0.in7, i64 1
  %11 = load double* %.sroa.0.0.in7, align 8
  %12 = fadd double %11, 1.000000e+01
  store double %12, double* %.sroa.0.0.in7, align 8
  %13 = icmp eq double* %10, %7
  br i1 %13, label %match_case, label %match_else

match_case:                                       ; preds = %match_else, %"function top level"
  ret void
}
~~~
  • Loading branch information
thestinger committed Aug 7, 2013
1 parent 7d115c9 commit 55f3d04
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 5 deletions.
21 changes: 20 additions & 1 deletion src/libstd/ptr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,6 @@ pub trait RawPtr<T> {
fn is_not_null(&self) -> bool;
unsafe fn to_option(&self) -> Option<&T>;
fn offset(&self, count: int) -> Self;
#[cfg(not(stage0))]
unsafe fn offset_inbounds(self, count: int) -> Self;
}

Expand Down Expand Up @@ -307,6 +306,14 @@ impl<T> RawPtr<T> for *T {
#[inline]
fn offset(&self, count: int) -> *T { offset(*self, count) }

/// Calculates the offset from a pointer. The offset *must* be in-bounds of
/// the object, or one-byte-past-the-end.
#[inline]
#[cfg(stage0)]
unsafe fn offset_inbounds(self, count: int) -> *T {
intrinsics::offset(self, count)
}

/// Calculates the offset from a pointer. The offset *must* be in-bounds of
/// the object, or one-byte-past-the-end.
#[inline]
Expand Down Expand Up @@ -347,6 +354,18 @@ impl<T> RawPtr<T> for *mut T {
#[inline]
fn offset(&self, count: int) -> *mut T { mut_offset(*self, count) }

/// Calculates the offset from a pointer. The offset *must* be in-bounds of
/// the object, or one-byte-past-the-end. An arithmetic overflow is also
/// undefined behaviour.
///
/// This method should be preferred over `offset` when the guarantee can be
/// satisfied, to enable better optimization.
#[inline]
#[cfg(stage0)]
unsafe fn offset_inbounds(self, count: int) -> *mut T {
intrinsics::offset(self as *T, count) as *mut T
}

/// Calculates the offset from a pointer. The offset *must* be in-bounds of
/// the object, or one-byte-past-the-end. An arithmetic overflow is also
/// undefined behaviour.
Expand Down
8 changes: 4 additions & 4 deletions src/libstd/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,7 @@ impl<'self,T> ImmutableVector<'self, T> for &'self [T] {
lifetime: cast::transmute(p)}
} else {
VecIterator{ptr: p,
end: p.offset(self.len() as int),
end: p.offset_inbounds(self.len() as int),
lifetime: cast::transmute(p)}
}
}
Expand Down Expand Up @@ -1837,7 +1837,7 @@ impl<'self,T> MutableVector<'self, T> for &'self mut [T] {
lifetime: cast::transmute(p)}
} else {
VecMutIterator{ptr: p,
end: p.offset(self.len() as int),
end: p.offset_inbounds(self.len() as int),
lifetime: cast::transmute(p)}
}
}
Expand Down Expand Up @@ -2193,7 +2193,7 @@ macro_rules! iterator {
// same pointer.
cast::transmute(self.ptr as uint + 1)
} else {
self.ptr.offset(1)
self.ptr.offset_inbounds(1)
};

Some(cast::transmute(old))
Expand Down Expand Up @@ -2225,7 +2225,7 @@ macro_rules! double_ended_iterator {
// See above for why 'ptr.offset' isn't used
cast::transmute(self.end as uint - 1)
} else {
self.end.offset(-1)
self.end.offset_inbounds(-1)
};
Some(cast::transmute(self.end))
}
Expand Down

5 comments on commit 55f3d04

@bors
Copy link
Contributor

@bors bors commented on 55f3d04 Aug 7, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bors
Copy link
Contributor

@bors bors commented on 55f3d04 Aug 7, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

merging thestinger/rust/iterator = 55f3d04 into auto

@bors
Copy link
Contributor

@bors bors commented on 55f3d04 Aug 7, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thestinger/rust/iterator = 55f3d04 merged ok, testing candidate = cdba212

@bors
Copy link
Contributor

@bors bors commented on 55f3d04 Aug 7, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fast-forwarding master to auto = cdba212

Please sign in to comment.