Skip to content

Commit

Permalink
[python] add shift; closes #89
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 3, 2020
1 parent f8c45c5 commit 02be076
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 21 deletions.
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
[![rust docs](https://docs.rs/polars/badge.svg)](https://docs.rs/polars/latest/polars/)
![Build, test and docs](https://github.com/ritchie46/polars/workflows/Build,%20test%20and%20docs/badge.svg)
[![](http://meritbadge.herokuapp.com/polars)](https://crates.io/crates/polars)
[![Gitter](https://badges.gitter.im/polars-rs/community.svg)](https://gitter.im/polars-rs/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)

## Blazingly fast in memory DataFrames in Rust

Expand All @@ -11,14 +12,14 @@ Its focus is being a fast in memory DataFrame library.
Polars is in rapid development, but it already supports most features needed for a useful DataFrame library. Do you
miss something, please make an issue and/or sent a PR.

## First run
## First run in Rust
Take a look at the [10 minutes to Polars notebook](examples/10_minutes_to_polars.ipynb) to get you started.
Want to run the notebook yourself? Clone the repo and run `$ cargo c && docker-compose up`. This will spin up a jupyter
notebook on `http://localhost:8891`. The notebooks are in the `/examples` directory.

Oh yeah.. and get a cup of coffee because compilation will take while during the first run.

## Python
## First run in Python
A subset of the Polars functionality is also exposed through Python bindings. You can install them for linux with:

`$ pip install py-polars`
Expand Down Expand Up @@ -102,12 +103,12 @@ Polars is written to be performant. Below are some comparisons with the (also ve
+------+------+------+
```

### Groupby's | aggregations | pivots
### Groupby's | aggregations | pivots | melts

```rust
use polars::prelude::*;
fn groupby_sum(df: &DataFrame) -> Result<DataFrame> {
df.groupby("column_name")?
df.groupby(&["a", "b"])?
.select("agg_column_name")
.sum()
}
Expand All @@ -116,7 +117,7 @@ Polars is written to be performant. Below are some comparisons with the (also ve
### Arithmetic
```rust
use polars::prelude::*;
let s: Series = [1, 2, 3].iter().collect();
let s = Series::new("foo", [1, 2, 3]);
let s_squared = &s * &s;
```

Expand Down Expand Up @@ -148,7 +149,7 @@ Polars is written to be performant. Below are some comparisons with the (also ve
.apply(|value| value.powf(2.0))
.into_series();

assert_eq!(Vec::from(squared.f64().unwrap()), &[Some(1.0), None, Some(9.0)])
assert_eq!(Vec::from(squared.f64().unwrap()), &[Some(1.0), None, Some(9.0)]);
```

### Comparisons
Expand All @@ -158,7 +159,6 @@ Polars is written to be performant. Below are some comparisons with the (also ve
use itertools::Itertools;
let s = Series::new("dollars", &[1, 2, 3]);
let mask = s.eq(1);
let valid = [true, false, false].iter();

assert_eq!(Vec::from(mask), &[Some(true), Some(false), Some(false)]);
```
Expand Down
5 changes: 2 additions & 3 deletions polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
//! +------+------+------+
//! ```
//!
//! ## Groupby's | aggregations | pivots
//! ## Groupby's | aggregations | pivots | melts
//!
//! ```
//! use polars::prelude::*;
Expand All @@ -88,7 +88,7 @@
//! ## Arithmetic
//! ```
//! use polars::prelude::*;
//! let s: Series = [1, 2, 3].iter().collect();
//! let s = Series::new("foo", [1, 2, 3]);
//! let s_squared = &s * &s;
//! ```
//!
Expand Down Expand Up @@ -134,7 +134,6 @@
//! use itertools::Itertools;
//! let s = Series::new("dollars", &[1, 2, 3]);
//! let mask = s.eq(1);
//! let valid = [true, false, false].iter();
//!
//! assert_eq!(Vec::from(mask), &[Some(true), Some(false), Some(false)]);
//! ```
Expand Down
18 changes: 16 additions & 2 deletions py-polars/pypolars/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,16 +362,18 @@ def sort(
else:
return wrap_df(self._df.sort(by_column, reverse))

def frame_equal(self, other: DataFrame) -> bool:
def frame_equal(self, other: DataFrame, null_equal: bool = False) -> bool:
"""
Check if DataFrame is equal to other.
Parameters
----------
other
DataFrame to compare with.
null_equal
Consider null values as equal.
"""
return self._df.frame_equal(other._df)
return self._df.frame_equal(other._df, null_equal)

def replace(self, column: str, new_col: Series):
"""
Expand Down Expand Up @@ -620,6 +622,18 @@ def melt(
id_vars = [id_vars]
return wrap_df(self._df.melt(id_vars, value_vars))

def shift(self, periods: int) -> DataFrame:
"""
Shift the values by a given period and fill the parts that will be empty due to this operation
with `Nones`.
Parameters
----------
periods
Number of places to shift (may be negative).
"""
return wrap_df(self._df.shift(periods))


class GroupBy:
def __init__(self, df: DataFrame, by: List[str]):
Expand Down
21 changes: 16 additions & 5 deletions py-polars/pypolars/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,19 +522,18 @@ def is_null(self) -> Series:
"""
return Series.from_pyseries(self._s.is_null())

def series_equal(self, other: Series) -> bool:
def series_equal(self, other: Series, null_equal: bool = False) -> bool:
"""
Check if series equal with another Series.
Parameters
----------
other
Series to compare with.
Returns
-------
Series
null_equal
Consider null values as equal.
"""
return self._s.series_equal(other._s)
return self._s.series_equal(other._s, null_equal)

def len(self) -> int:
"""
Expand Down Expand Up @@ -804,3 +803,15 @@ def apply(
else:
dt = dtype_to_int(dtype_out)
return wrap_s(self._s.apply_lambda(func, dt))

def shift(self, periods: int) -> Series:
"""
Shift the values by a given period and fill the parts that will be empty due to this operation
with `Nones`.
Parameters
----------
periods
Number of places to shift (may be negative).
"""
return wrap_s(self._s.shift(periods))
13 changes: 11 additions & 2 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,12 @@ impl PyDataFrame {
PyDataFrame::new(df)
}

pub fn frame_equal(&self, other: &PyDataFrame) -> bool {
self.df.frame_equal(&other.df)
pub fn frame_equal(&self, other: &PyDataFrame, null_equal: bool) -> bool {
if null_equal {
self.df.frame_equal_missing(&other.df)
} else {
self.df.frame_equal(&other.df)
}
}

pub fn groupby(&self, by: Vec<&str>, select: Option<Vec<String>>, agg: &str) -> PyResult<Self> {
Expand Down Expand Up @@ -380,4 +384,9 @@ impl PyDataFrame {
.map_err(PyPolarsEr::from)?;
Ok(PyDataFrame::new(df))
}

pub fn shift(&self, periods: i32) -> PyResult<Self> {
let df = self.df.shift(periods).map_err(PyPolarsEr::from)?;
Ok(PyDataFrame::new(df))
}
}
13 changes: 11 additions & 2 deletions py-polars/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,12 @@ impl PySeries {
Self::new(Series::Bool(self.series.is_null()))
}

pub fn series_equal(&self, other: &PySeries) -> PyResult<bool> {
Ok(self.series.series_equal(&other.series))
pub fn series_equal(&self, other: &PySeries, null_equal: bool) -> bool {
if null_equal {
self.series.series_equal_missing(&other.series)
} else {
self.series.series_equal(&other.series)
}
}
pub fn eq(&self, rhs: &PySeries) -> PyResult<Self> {
Ok(Self::new(Series::Bool(self.series.eq(&rhs.series))))
Expand Down Expand Up @@ -472,6 +476,11 @@ impl PySeries {

Ok(PySeries::new(out))
}

pub fn shift(&self, periods: i32) -> PyResult<Self> {
let s = self.series.shift(periods).map_err(PyPolarsEr::from)?;
Ok(PySeries::new(s))
}
}

macro_rules! impl_ufuncs {
Expand Down
7 changes: 7 additions & 0 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,10 @@ def test_melt():
df = DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]})
melted = df.melt(id_vars="A", value_vars=["B", "C"])
assert melted["value"] == [1, 3, 4, 2, 4, 6]


def test_shift():
df = DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5]})
a = df.shift(1)
b = DataFrame({"A": [None, "a", "b"], "B": [None, 1, 3]}, nullable=True)
assert a.frame_equal(b, null_equal=True)
7 changes: 7 additions & 0 deletions py-polars/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,10 @@ def test_apply():

with pytest.raises(TypeError):
a.apply(lambda x: len(x))


def test_shift():
a = Series("a", [1, 2, 3])
assert a.shift(1) == [None, 1, 2]
assert a.shift(-1) == [1, 2, None]
assert a.shift(-2) == [1, None, None]

0 comments on commit 02be076

Please sign in to comment.