Skip to content

Commit

Permalink
[python] fill_none
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 19, 2020
1 parent 7336460 commit c1fa84f
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 17 deletions.
7 changes: 0 additions & 7 deletions examples/10_minutes_to_pypolars.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1606,13 +1606,6 @@
"metadata": {},
"outputs": [],
"source": [
" >\n",
"Terminal Sessions\n",
"\n",
"Kernel Sessions\n",
"\n",
" 10_minutes_to_pypolars.ipynb\n",
"\n",
"df = pl.DataFrame(df.to_dict(orient=\"list\"))"
]
},
Expand Down
2 changes: 1 addition & 1 deletion py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
polars = {path = "../polars", features = ["parquet", "simd"]}
polars = {path = "../polars", features = ["parquet", "simd", "parallel"]}
pyo3 = {version = "0.11", features = ["extension-module"] }
thiserror = "1.0.20"
numpy = "0.11"
Expand Down
65 changes: 56 additions & 9 deletions py-polars/pypolars/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,16 +293,44 @@ def groupby(self, by: Union[str, List[str]]) -> GroupBy:
return GroupBy(self._df, by)

def join(
self, df: DataFrame, left_on: str, right_on: str, how="inner"
self, df: DataFrame, left_on: str, right_on: str, how="inner", parallel: bool = False
) -> DataFrame:
if how == "inner":
inner = self._df.inner_join(df._df, left_on, right_on)
elif how == "left":
inner = self._df.left_join(df._df, left_on, right_on)
elif how == "outer":
inner = self._df.outer_join(df._df, left_on, right_on)
else:
return NotImplemented
"""
SQL like joins
Parameters
----------
df
DataFrame to join with
left_on
Name of the left join column
right_on
Name of the right join column
how
Join strategy
- "inner"
- "left"
- "outer"
parallel
Use parallel join strategy.
Returns
-------
Joined DataFrame
"""
self._df.with_parallel(parallel)
try:
if how == "inner":
inner = self._df.inner_join(df._df, left_on, right_on)
elif how == "left":
inner = self._df.left_join(df._df, left_on, right_on)
elif how == "outer":
inner = self._df.outer_join(df._df, left_on, right_on)
else:
return NotImplemented
except Exception as e:
self._df.with_parallel(False)
raise e
return wrap_df(inner)

def hstack(self, columns: List[Series]):
Expand All @@ -329,6 +357,25 @@ def clone(self) -> DataFrame:
def get_columns(self) -> List[Series]:
return list(map(lambda s: wrap_s(s), self._df.get_columns()))

def fill_none(self, strategy: str) -> DataFrame:
"""
Fill None values by a filling strategy.
Parameters
----------
strategy
- "backward"
- "forward"
- "mean"
- "min'
- "max"
Returns
-------
DataFrame with None replaced with the filling strategy.
"""
return wrap_df(self._df.fill_none(strategy))


class GroupBy:
def __init__(self, df: DataFrame, by: List[str]):
Expand Down
3 changes: 3 additions & 0 deletions py-polars/pypolars/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,3 +541,6 @@ def clone(self) -> Series:
Cheap deep clones
"""
return wrap_s(self._s.clone())

def fill_none(self, strategy: str) -> Series:
return wrap_s(self._s.fill_none(strategy))
17 changes: 17 additions & 0 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,23 @@ impl PyDataFrame {
format!("{:?}", self.df)
}

pub fn with_parallel(&mut self, parallel: bool) {
self.df.with_parallel(parallel);
}

pub fn fill_none(&self, strategy: &str) -> PyResult<Self> {
let strat = match strategy {
"backward" => FillNoneStrategy::Backward,
"forward" => FillNoneStrategy::Forward,
"min" => FillNoneStrategy::Min,
"max" => FillNoneStrategy::Max,
"mean" => FillNoneStrategy::Mean,
s => return Err(PyPolarsEr::Other(format!("Strategy {} not supported", s)).into()),
};
let df = self.df.fill_none(strat).map_err(PyPolarsEr::from)?;
Ok(PyDataFrame::new(df))
}

pub fn inner_join(&self, other: &PyDataFrame, left_on: &str, right_on: &str) -> PyResult<Self> {
let df = self
.df
Expand Down
13 changes: 13 additions & 0 deletions py-polars/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,19 @@ impl PySeries {
self.series.as_single_ptr()
}

pub fn fill_none(&self, strategy: &str) -> PyResult<Self> {
let strat = match strategy {
"backward" => FillNoneStrategy::Backward,
"forward" => FillNoneStrategy::Forward,
"min" => FillNoneStrategy::Min,
"max" => FillNoneStrategy::Max,
"mean" => FillNoneStrategy::Mean,
s => return Err(PyPolarsEr::Other(format!("Strategy {} not supported", s)).into()),
};
let series = self.series.fill_none(strat).map_err(PyPolarsEr::from)?;
Ok(PySeries::new(series))
}

/// Attempts to copy data to numpy arrays. If integer types have missing values
/// they will be casted to floating point values, where NaNs are used to represent missing.
/// Strings will be converted to python lists and booleans will be a numpy array if there are no
Expand Down
6 changes: 6 additions & 0 deletions py-polars/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,9 @@ def test_get():
a = Series("a", [1, 2, 3])
assert a[0] == 1
assert a[:2] == [1, 2]


def test_fill_none():
a = Series("a", [1, 2, None], nullable=True)
b = a.fill_none("forward")
assert b == [1, 2, 2]

0 comments on commit c1fa84f

Please sign in to comment.