Skip to content

Commit

Permalink
[python] pivot
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 17, 2020
1 parent 4830e61 commit 5f7a6a3
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 1 deletion.
2 changes: 1 addition & 1 deletion py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ ndarray = "0.13.1"


[lib]
name = "polars"
name = "pypolars"
crate-type = ["cdylib"]


Expand Down
41 changes: 41 additions & 0 deletions py-polars/polars/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ def __getitem__(self, item):
if isinstance(item, tuple):
row_selection, col_selection = item

if isinstance(row_selection, slice):
series = self.__getitem__(col_selection)
wrap_s(series[row_selection])

# column selection can be "a" and ["a", "b"]
if isinstance(col_selection, str):
col_selection = [col_selection]
Expand All @@ -88,6 +92,9 @@ def __getitem__(self, item):
if isinstance(item, str):
return wrap_s(self._df.column(item))

if isinstance(item, int):
return wrap_s(self._df.select_at_idx(item))

# select multiple columns
if isinstance(item, Sequence) and isinstance(item[0], str):
return wrap_df(self._df.select(item))
Expand All @@ -105,6 +112,7 @@ def __getitem__(self, item):
return wrap_df(self._df.filter(item.inner()))
if dtype == "u32":
return wrap_df(self._df.take_with_series(item.inner()))
return NotImplemented

def __len__(self):
return self.height
Expand Down Expand Up @@ -189,6 +197,39 @@ def select(self, columns: Union[str, List[str]]) -> GBSelection:
columns = [columns]
return GBSelection(self._df, self.by, columns)

def pivot(self, pivot_column: str, values_column: str) -> PivotOps:
return PivotOps(self._df, self.by, pivot_column, values_column)


class PivotOps:
def __init__(
self, df: DataFrame, by: List[str], pivot_column: str, values_column: str
):
self._df = df
self.by = by
self.pivot_column = pivot_column
self.values_column = values_column

def first(self):
wrap_df(self._df.pivot(self.by, self.pivot_column, self.values_column, "first"))

def sum(self):
wrap_df(self._df.pivot(self.by, self.pivot_column, self.values_column, "sum"))

def min(self):
wrap_df(self._df.pivot(self.by, self.pivot_column, self.values_column, "min"))

def max(self):
wrap_df(self._df.pivot(self.by, self.pivot_column, self.values_column, "max"))

def mean(self):
wrap_df(self._df.pivot(self.by, self.pivot_column, self.values_column, "mean"))

def median(self):
wrap_df(
self._df.pivot(self.by, self.pivot_column, self.values_column, "median")
)


class GBSelection:
def __init__(self, df: DataFrame, by: List[str], selection: List[str]):
Expand Down
7 changes: 7 additions & 0 deletions py-polars/polars/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,3 +445,10 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):

else:
return NotImplemented

def to_numpy(self) -> np.ndarray:
a = self._s.to_numpy()
# strings are returned in lists
if isinstance(a, list):
return np.array(a)
return a
22 changes: 22 additions & 0 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,4 +263,26 @@ impl PyDataFrame {
let df = df.map_err(PyPolarsEr::from)?;
Ok(PyDataFrame::new(df))
}

pub fn pivot(
&self,
by: Vec<String>,
pivot_column: &str,
values_column: &str,
agg: &str,
) -> PyResult<Self> {
let mut gb = self.df.groupby(&by).map_err(PyPolarsEr::from)?;
let pivot = gb.pivot(pivot_column, values_column);
let df = match agg {
"first" => pivot.first(),
"min" => pivot.min(),
"max" => pivot.max(),
"mean" => pivot.mean(),
"median" => pivot.median(),
"sum" => pivot.sum(),
a => Err(PolarsError::Other(format!("agg fn {} does not exists", a))),
};
let df = df.map_err(PyPolarsEr::from)?;
Ok(PyDataFrame::new(df))
}
}

0 comments on commit 5f7a6a3

Please sign in to comment.