Skip to content

Commit

Permalink
make slice infallible
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Apr 7, 2021
1 parent 3e0967f commit a5e1942
Show file tree
Hide file tree
Showing 17 changed files with 78 additions and 92 deletions.
4 changes: 2 additions & 2 deletions polars/polars-core/src/chunked_array/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,7 @@ mod test {
// This failed with arrow comparisons.
// sliced
let a1: Int32Chunked = (&[Some(1), Some(2)]).iter().copied().collect();
let a1 = a1.slice(1, 1).unwrap();
let a1 = a1.slice(1, 1);
let a2: Int32Chunked = (&[Some(2)]).iter().copied().collect();
assert_eq!(a1.eq(&a2).sum(), a2.eq(&a1).sum());
assert_eq!(a1.neq(&a2).sum(), a2.neq(&a1).sum());
Expand All @@ -942,7 +942,7 @@ mod test {
assert_eq!(a1.gt_eq(&a2).sum(), a2.gt_eq(&a1).sum());

let a1: Utf8Chunked = (&["a", "b"]).iter().copied().collect();
let a1 = a1.slice(1, 1).unwrap();
let a1 = a1.slice(1, 1);
let a2: Utf8Chunked = (&["b"]).iter().copied().collect();
assert_eq!(a1.eq(&a2).sum(), a2.eq(&a1).sum());
assert_eq!(a1.neq(&a2).sum(), a2.neq(&a1).sum());
Expand Down
67 changes: 37 additions & 30 deletions polars/polars-core/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ impl<T> ChunkedArray<T> {
}

/// Take a view of top n elements
pub fn limit(&self, num_elements: usize) -> Result<Self> {
pub fn limit(&self, num_elements: usize) -> Self {
self.slice(0, num_elements)
}

Expand Down Expand Up @@ -309,24 +309,31 @@ impl<T> ChunkedArray<T> {

/// Slice the array. The chunks are reallocated the underlying data slices are zero copy.
///
/// When offset is negative it will be counted from the end of the array
pub fn slice(&self, offset: i64, length: usize) -> Result<Self> {
/// When offset is negative it will be counted from the end of the array.
/// This method will never error,
/// and will slice the best match when offset, or length is out of bounds
pub fn slice(&self, offset: i64, length: usize) -> Self {
let abs_offset = offset.abs() as usize;

if (offset >= 0 && abs_offset + length > self.len())
|| (offset < 0 && (abs_offset > self.len() || abs_offset < length))
{
return Err(PolarsError::OutOfBounds("offset and length was larger than the size of the ChunkedArray during slice operation".into()));
}

let len = self.len();
// The offset counted from the start of the array
let raw_offset = if offset < 0 {
self.len() - abs_offset
// negative index
let (raw_offset, slice_len) = if offset < 0 {
if abs_offset <= len {
(len - abs_offset, std::cmp::min(length, abs_offset))
// negative index larger that array: slice from start
} else {
(0, std::cmp::min(length, len))
}
// positive index
} else if abs_offset <= len {
(abs_offset, std::cmp::min(length, len - abs_offset))
// empty slice
} else {
abs_offset
(len, 0)
};

let mut remaining_length = length;
let mut remaining_length = slice_len;
let mut remaining_offset = raw_offset;
let mut new_chunks = vec![];

Expand All @@ -350,7 +357,7 @@ impl<T> ChunkedArray<T> {
break;
}
}
Ok(self.copy_with_chunks(new_chunks))
self.copy_with_chunks(new_chunks)
}

/// Get a mask of the null values.
Expand Down Expand Up @@ -407,11 +414,10 @@ impl<T> ChunkedArray<T> {

/// Get the head of the ChunkedArray
pub fn head(&self, length: Option<usize>) -> Self {
let res_ca = match length {
match length {
Some(len) => self.slice(0, std::cmp::min(len, self.len())),
None => self.slice(0, std::cmp::min(10, self.len())),
};
res_ca.unwrap()
}
}

/// Get the tail of the ChunkedArray
Expand All @@ -420,7 +426,7 @@ impl<T> ChunkedArray<T> {
Some(len) => std::cmp::min(len, self.len()),
None => std::cmp::min(10, self.len()),
};
self.slice(-(len as i64), len).unwrap()
self.slice(-(len as i64), len)
}

/// Append in place.
Expand Down Expand Up @@ -978,7 +984,7 @@ pub(crate) mod test {
#[test]
fn limit() {
let a = get_chunked_array();
let b = a.limit(2).unwrap();
let b = a.limit(2);
println!("{:?}", b);
assert_eq!(b.len(), 2)
}
Expand Down Expand Up @@ -1043,17 +1049,18 @@ pub(crate) mod test {
let mut first = UInt32Chunked::new_from_slice("first", &[0, 1, 2]);
let second = UInt32Chunked::new_from_slice("second", &[3, 4, 5]);
first.append(&second);
assert_slice_equal(&first.slice(0, 3).unwrap(), &[0, 1, 2]);
assert_slice_equal(&first.slice(0, 4).unwrap(), &[0, 1, 2, 3]);
assert_slice_equal(&first.slice(1, 4).unwrap(), &[1, 2, 3, 4]);
assert_slice_equal(&first.slice(3, 2).unwrap(), &[3, 4]);
assert_slice_equal(&first.slice(3, 3).unwrap(), &[3, 4, 5]);
assert_slice_equal(&first.slice(-3, 3).unwrap(), &[3, 4, 5]);
assert_slice_equal(&first.slice(-6, 6).unwrap(), &[0, 1, 2, 3, 4, 5]);

assert!(first.slice(-7, 2).is_err());
assert!(first.slice(-3, 4).is_err());
assert!(first.slice(3, 4).is_err());
assert_slice_equal(&first.slice(0, 3), &[0, 1, 2]);
assert_slice_equal(&first.slice(0, 4), &[0, 1, 2, 3]);
assert_slice_equal(&first.slice(1, 4), &[1, 2, 3, 4]);
assert_slice_equal(&first.slice(3, 2), &[3, 4]);
assert_slice_equal(&first.slice(3, 3), &[3, 4, 5]);
assert_slice_equal(&first.slice(-3, 3), &[3, 4, 5]);
assert_slice_equal(&first.slice(-6, 6), &[0, 1, 2, 3, 4, 5]);

assert_eq!(first.slice(-7, 2).len(), 2);
assert_eq!(first.slice(-3, 4).len(), 3);
assert_eq!(first.slice(3, 4).len(), 3);
assert_eq!(first.slice(10, 4).len(), 0);
}

#[test]
Expand Down
1 change: 0 additions & 1 deletion polars/polars-core/src/chunked_array/ops/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ macro_rules! impl_quantile {
((($self.len() - null_count) as f64) * $quantile + null_count as f64) as i64,
1,
)
.unwrap()
.into_iter()
.next()
.unwrap();
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/ops/chunkops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ mod test {
let s = Series::new("", &["foo", "bar", "spam"]);
let mut a = s.cast::<CategoricalType>().unwrap();

a.append(&a.slice(0, 2).unwrap()).unwrap();
a.append(&a.slice(0, 2)).unwrap();
a.rechunk();
assert!(a.categorical().unwrap().categorical_map.is_some());
}
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-core/src/chunked_array/ops/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ impl ChunkFilter<Utf8Type> for Utf8Chunked {
if filter.len() == 1 {
return match filter.get(0) {
Some(true) => Ok(self.clone()),
_ => self.slice(0, 0),
_ => Ok(self.slice(0, 0)),
};
}
check_filter_len!(self, filter);
Expand Down Expand Up @@ -111,7 +111,7 @@ impl ChunkFilter<ListType> for ListChunked {
if filter.len() == 1 {
return match filter.get(0) {
Some(true) => Ok(self.clone()),
_ => self.slice(0, 0),
_ => Ok(self.slice(0, 0)),
};
}
let (left, filter) = align_chunks_binary(self, filter);
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/ops/shift.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ macro_rules! impl_shift_fill {
let periods = clamp($periods, -($self.len() as i64), $self.len() as i64);
let slice_offset = (-periods).max(0) as i64;
let length = $self.len() - abs(periods) as usize;
let mut slice = $self.slice(slice_offset, length).unwrap();
let mut slice = $self.slice(slice_offset, length);

let fill_length = abs(periods) as usize;
let mut fill = match $fill_value {
Expand Down
14 changes: 7 additions & 7 deletions polars/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1047,13 +1047,13 @@ impl DataFrame {
}

/// Slice the DataFrame along the rows.
pub fn slice(&self, offset: i64, length: usize) -> Result<Self> {
pub fn slice(&self, offset: i64, length: usize) -> Self {
let col = self
.columns
.par_iter()
.iter()
.map(|s| s.slice(offset, length))
.collect::<Result<Vec<_>>>()?;
Ok(DataFrame::new_no_checks(col))
.collect::<Vec<_>>();
DataFrame::new_no_checks(col)
}

/// Get the head of the DataFrame
Expand Down Expand Up @@ -1504,7 +1504,7 @@ impl<'a> Iterator for RecordBatchIter<'a> {
let mut rb_cols = Vec::with_capacity(self.columns.len());
// take a slice from all columns and add the the current RecordBatch
self.columns.iter().for_each(|s| {
let slice = s.slice(self.idx as i64, length).unwrap();
let slice = s.slice(self.idx as i64, length);
rb_cols.push(Arc::clone(&slice.chunks()[0]))
});
let rb = RecordBatch::try_new(Arc::clone(&self.schema), rb_cols).unwrap();
Expand Down Expand Up @@ -1699,7 +1699,7 @@ mod test {
#[test]
fn slice() {
let df = create_frame();
let sliced_df = df.slice(0, 2).expect("slice");
let sliced_df = df.slice(0, 2);
assert_eq!(sliced_df.shape(), (2, 2));
println!("{:?}", df)
}
Expand Down Expand Up @@ -1774,7 +1774,7 @@ mod test {
}
.unwrap();

df.vstack_mut(&df.slice(0, 3).unwrap()).unwrap();
df.vstack_mut(&df.slice(0, 3)).unwrap();
assert_eq!(df.n_chunks().unwrap(), 2)
}

Expand Down
4 changes: 2 additions & 2 deletions polars/polars-core/src/series/implementations/dates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -351,8 +351,8 @@ macro_rules! impl_dyn_series {
self.0.append_array(other)
}

fn slice(&self, offset: i64, length: usize) -> Result<Series> {
return self.0.slice(offset, length).map(|ca| ca.into_series());
fn slice(&self, offset: i64, length: usize) -> Series {
self.0.slice(offset, length).into_series()
}

fn append(&mut self, other: &Series) -> Result<()> {
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-core/src/series/implementations/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -513,8 +513,8 @@ macro_rules! impl_dyn_series {
self.0.append_array(other)
}

fn slice(&self, offset: i64, length: usize) -> Result<Series> {
return self.0.slice(offset, length).map(|ca| ca.into_series());
fn slice(&self, offset: i64, length: usize) -> Series {
return self.0.slice(offset, length).into_series();
}

fn append(&mut self, other: &Series) -> Result<()> {
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-core/src/series/implementations/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ where
ObjectChunked::append_array(&mut self.0, other)
}

fn slice(&self, offset: i64, length: usize) -> Result<Series> {
ObjectChunked::slice(&self.0, offset, length).map(|ca| ca.into_series())
fn slice(&self, offset: i64, length: usize) -> Series {
ObjectChunked::slice(&self.0, offset, length).into_series()
}

fn append(&mut self, other: &Series) -> Result<()> {
Expand Down
16 changes: 8 additions & 8 deletions polars/polars-core/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,15 +353,15 @@ pub trait SeriesTrait: Send + Sync + private::PrivateSeries {
}

/// Take `num_elements` from the top as a zero copy view.
fn limit(&self, num_elements: usize) -> Result<Series> {
fn limit(&self, num_elements: usize) -> Series {
self.slice(0, num_elements)
}

/// Get a zero copy view of the data.
///
/// When offset is negative the offset is counted from the
/// end of the array
fn slice(&self, _offset: i64, _length: usize) -> Result<Series> {
fn slice(&self, _offset: i64, _length: usize) -> Series {
unimplemented!()
}

Expand Down Expand Up @@ -1535,9 +1535,9 @@ mod test {
fn series_slice_works() {
let series = Series::new("a", &[1i64, 2, 3, 4, 5]);

let slice_1 = series.slice(-3, 3).unwrap();
let slice_2 = series.slice(-5, 5).unwrap();
let slice_3 = series.slice(0, 5).unwrap();
let slice_1 = series.slice(-3, 3);
let slice_2 = series.slice(-5, 5);
let slice_3 = series.slice(0, 5);

assert_eq!(slice_1.get(0), AnyValue::Int64(3));
assert_eq!(slice_2.get(0), AnyValue::Int64(1));
Expand All @@ -1548,8 +1548,8 @@ mod test {
fn out_of_range_slice_does_not_panic() {
let series = Series::new("a", &[1i64, 2, 3, 4, 5]);

series.slice(-3, 4).expect_err("Should be out of bounds");
series.slice(-6, 2).expect_err("Should be out of bounds");
series.slice(4, 2).expect_err("Should be out of bounds");
series.slice(-3, 4);
series.slice(-6, 2);
series.slice(4, 2);
}
}
2 changes: 1 addition & 1 deletion polars/polars-core/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ macro_rules! split_array {
};
$ca.slice((i * chunk_size) as $ty, len)
})
.collect::<Result<_>>()?;
.collect();
Ok(v)
}};
}
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-io/src/csv_core/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ impl<R: Read + Sync + Send> SequentialReader<R> {
// Let's slice to correct number of rows if possible.
if let Some(n_rows) = self.n_rows {
if n_rows < df.height() {
df = df.slice(0, n_rows).unwrap()
df = df.slice(0, n_rows)
}
}
Ok(df)
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/src/physical_plan/executors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,7 @@ pub struct SliceExec {
impl Executor for SliceExec {
fn execute(&mut self, cache: &Cache) -> Result<DataFrame> {
let df = self.input.execute(cache)?;
df.slice(self.offset, self.len)
Ok(df.slice(self.offset, self.len))
}
}
pub struct MeltExec {
Expand Down
22 changes: 2 additions & 20 deletions polars/polars-lazy/src/physical_plan/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -835,22 +835,10 @@ pub struct SliceExpr {
pub(crate) len: usize,
}

impl SliceExpr {
fn slice_series(&self, series: &Series) -> Result<Series> {
let series_len = series.len() as i64;
let offset = if self.offset >= 0 {
self.offset as i64
} else {
series_len - self.offset
};
series.slice(offset, self.len)
}
}

impl PhysicalExpr for SliceExpr {
fn evaluate(&self, df: &DataFrame) -> Result<Series> {
let series = self.input.evaluate(df)?;
self.slice_series(&series)
Ok(series.slice(self.offset, self.len))
}

fn to_field(&self, input_schema: &Schema) -> Result<Field> {
Expand All @@ -870,13 +858,7 @@ impl AggPhysicalExpr for SliceExpr {
s.list()
.unwrap()
.into_iter()
.map(|opt_s| match opt_s {
None => None,
Some(s) => {
let r = self.slice_series(&s);
r.ok()
}
})
.map(|opt_s| opt_s.map(|s| s.slice(self.offset, self.len)))
.collect::<ListChunked>()
.into_series()
});
Expand Down
7 changes: 3 additions & 4 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -466,12 +466,11 @@ impl PyDataFrame {
Ok(())
}

pub fn slice(&self, offset: usize, length: usize) -> PyResult<Self> {
pub fn slice(&self, offset: usize, length: usize) -> Self {
let df = self
.df
.slice(offset as i64, length)
.map_err(PyPolarsEr::from)?;
Ok(PyDataFrame::new(df))
.slice(offset as i64, length);
df.into()
}

pub fn head(&self, length: Option<usize>) -> Self {
Expand Down

0 comments on commit a5e1942

Please sign in to comment.