Skip to content

Commit

Permalink
melt operation and df! macro
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 3, 2020
1 parent 0454f3a commit b67f9ff
Show file tree
Hide file tree
Showing 9 changed files with 152 additions and 21 deletions.
19 changes: 9 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,22 +66,21 @@ Polars is written to be performant. Below are some comparisons with the (also ve

```rust
use polars::prelude::*;

fn join() -> Result<DataFrame> {
// Create first df.
let s0 = Series::new("days", &[0, 1, 2, 3, 4]);
let s1 = Series::new("temp", &[22.1, 19.9, 7., 2., 3.]);
let temp = DataFrame::new(vec![s0, s1])?;

let temp = df!("days" => &[0, 1, 2, 3, 4],
"temp" => &[22.1, 19.9, 7., 2., 3.])?;

// Create second df.
let s0 = Series::new("days", &[1, 2]);
let s1 = Series::new("rain", &[0.1, 0.2]);
let rain = DataFrame::new(vec![s0, s1])?;

let rain = df!("days" => &[1, 2],
"rain" => &[0.1, 0.2])?;

// Left join on days column.
temp.left_join(&rain, "days", "days")
}
println!("{}", join().unwrap());

println!("{:?}", join().unwrap());
```

```text
Expand Down
2 changes: 2 additions & 0 deletions polars/src/doc/changelog/v0_7.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@
//! - group indexes
//! - agg (combined aggregations)
//! * explode operation
//! * melt operation
//! * df! macro
72 changes: 72 additions & 0 deletions polars/src/frame/explode.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use crate::frame::select::Selection;
use crate::prelude::*;
use std::collections::VecDeque;

impl LargeListChunked {
pub fn explode(&self) -> Result<(Series, Vec<usize>)> {
Expand Down Expand Up @@ -130,6 +132,60 @@ impl DataFrame {
Ok(self.clone())
}
}

///
/// Unpivot a `DataFrame` from wide to long format.
///
/// # Example
///
/// # Arguments
///
/// * `id_vars` - String slice that represent the columns to use as id variables.
/// * `value_vars` - String slice that represent the columns to use as value variables.
///
/// ```rust
///
/// # #[macro_use] extern crate polars;
/// use polars::prelude::*;
/// let df = df!("A" => &["a", "b", "a"],
/// "B" => &[1, 3, 5],
/// "C" => &[10, 11, 12],
/// "D" => &[2, 4, 6]
/// )
/// .unwrap();
///
/// let melted = df.melt(&["A", "B"], &["C", "D"]).unwrap();
/// println!("{:?}", df);
/// println!("{:?}", melted);
/// ```
pub fn melt<'a, 'b, J, K, SelId: Selection<'a, J>, SelValue: Selection<'b, K>>(
&self,
id_vars: SelId,
value_vars: SelValue,
) -> Result<Self> {
let ids = self.select(id_vars)?;
let value_vars = value_vars.to_selection_vec();
let len = self.height();

let mut dataframe_chunks = VecDeque::with_capacity(value_vars.len());

for value_column_name in value_vars {
let variable_col = Utf8Chunked::full("variable", value_column_name, len).into_series();
let mut value_col = self.column(value_column_name)?.clone();
value_col.rename("value");

let mut df_chunk = ids.clone();
df_chunk.hstack(&[variable_col, value_col])?;
dataframe_chunks.push_back(df_chunk)
}

let mut main_df = dataframe_chunks.pop_front().ok_or(PolarsError::NoData)?;

while let Some(df) = dataframe_chunks.pop_front() {
main_df.vstack(&df)?;
}
Ok(main_df)
}
}

#[cfg(test)]
Expand All @@ -151,4 +207,20 @@ mod test {
println!("{:?}", exploded);
assert_eq!(exploded.shape(), (9, 3));
}

#[test]
fn test_melt() {
let df = df!("A" => &["a", "b", "a"],
"B" => &[1, 3, 5],
"C" => &[10, 11, 12],
"D" => &[2, 4, 6]
)
.unwrap();

let melted = df.melt(&["A", "B"], &["C", "D"]).unwrap();
assert_eq!(
Vec::from(melted.column("value").unwrap().i32().unwrap()),
&[Some(10), Some(11), Some(12), Some(2), Some(4), Some(6)]
)
}
}
13 changes: 7 additions & 6 deletions polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,24 +35,25 @@
//! ## Joins
//!
//! ```
//! # #[macro_use] extern crate polars;
//! # fn main() {
//! use polars::prelude::*;
//!
//! fn join() -> Result<DataFrame> {
//! // Create first df.
//! let s0 = Series::new("days", &[0, 1, 2, 3, 4]);
//! let s1 = Series::new("temp", &[22.1, 19.9, 7., 2., 3.]);
//! let temp = DataFrame::new(vec![s0, s1])?;
//! let temp = df!("days" => &[0, 1, 2, 3, 4],
//! "temp" => &[22.1, 19.9, 7., 2., 3.])?;
//!
//! // Create second df.
//! let s0 = Series::new("days", &[1, 2]);
//! let s1 = Series::new("rain", &[0.1, 0.2]);
//! let rain = DataFrame::new(vec![s0, s1])?;
//! let rain = df!("days" => &[1, 2],
//! "rain" => &[0.1, 0.2])?;
//!
//! // Left join on days column.
//! temp.left_join(&rain, "days", "days")
//! }
//!
//! println!("{}", join().unwrap())
//! # }
//! ```
//!
//! ```text
Expand Down
14 changes: 14 additions & 0 deletions polars/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,20 @@ macro_rules! apply_method_all_series_and_return {
}
}

#[macro_export]
macro_rules! df {
($($col_name:expr => $slice:expr), +) => {
{
let mut columns = vec![];
$(
columns.push(Series::new($col_name, $slice));
)+
DataFrame::new(columns)
}

}
}

/// Clone if upstream hasn't implemented clone
pub(crate) fn clone<T>(t: &T) -> T {
unsafe { mem::transmute_copy(t) }
Expand Down
24 changes: 24 additions & 0 deletions py-polars/pypolars/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,30 @@ def explode(self, column: str) -> DataFrame:
"""
return wrap_df(self._df.explode(column))

def melt(
self, id_vars: Union[List[str], str], value_vars: Union[List[str], str]
) -> DataFrame:
"""
Unpivot DataFrame to long format.
Parameters
----------
id_vars
Columns to use as identifier variables
value_vars
Values to use as identifier variables
Returns
-------
"""
if isinstance(value_vars, str):
value_vars = [value_vars]
if isinstance(id_vars, str):
id_vars = [id_vars]
return wrap_df(self._df.melt(id_vars, value_vars))


class GroupBy:
def __init__(self, df: DataFrame, by: List[str]):
Expand Down
15 changes: 10 additions & 5 deletions py-polars/pypolars/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,16 +81,21 @@ def __init__(
return

self._s: PySeries
# castable to numpy
if not isinstance(values, np.ndarray) and not nullable:
values = np.array(values)

# series path
if isinstance(values, Series):
self.from_pyseries(values)
return
elif isinstance(values, dict):
raise ValueError(
f"Constructing a Series with a dict is not supported for {values}"
)

# castable to numpy
if not isinstance(values, np.ndarray) and not nullable:
values = np.array(values)

# numpy path
elif isinstance(values, np.ndarray):
if isinstance(values, np.ndarray):
dtype = values.dtype
if dtype == np.int64:
self._s = PySeries.new_i64(name, values)
Expand Down
8 changes: 8 additions & 0 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,4 +372,12 @@ impl PyDataFrame {
let df = df.map_err(PyPolarsEr::from)?;
Ok(PyDataFrame::new(df))
}

pub fn melt(&self, id_vars: Vec<&str>, value_vars: Vec<&str>) -> PyResult<Self> {
let df = self
.df
.melt(id_vars, value_vars)
.map_err(PyPolarsEr::from)?;
Ok(PyDataFrame::new(df))
}
}
6 changes: 6 additions & 0 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,9 @@ def test_set():
df = DataFrame({"foo": np.random.rand(10), "bar": np.arange(10), "ham": ["h"] * 10})
df["new"] = np.random.rand(10)
df[df["new"] > 0.5, "new"] = 1


def test_melt():
df = DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]})
melted = df.melt(id_vars="A", value_vars=["B", "C"])
assert melted["value"] == [1, 3, 4, 2, 4, 6]

0 comments on commit b67f9ff

Please sign in to comment.