Skip to content

Commit

Permalink
Implement cut() and add algoritims crate (#4715)
Browse files Browse the repository at this point in the history
  • Loading branch information
hpux735 committed Sep 3, 2022
1 parent 583b112 commit 3408c52
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 0 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ members = [
"polars/polars-time",
"polars/polars-utils",
"polars/polars-ops",
"polars/polars-algo",
"examples/read_csv",
"examples/read_parquet",
"examples/python_rust_compiled_function",
Expand Down
2 changes: 2 additions & 0 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ trigonometry = ["polars-lazy/trigonometry"]
sign = ["polars-lazy/sign"]
pivot = ["polars-lazy/pivot"]
top_k = ["polars-lazy/top_k"]
algo = ["polars-algo"]

test = [
"lazy",
Expand Down Expand Up @@ -265,6 +266,7 @@ bench = [
]

[dependencies]
polars-algo = { version = "0.23.0", path = "./polars-algo", optional = true }
polars-core = { version = "0.23.0", path = "./polars-core", features = ["docs", "private"], default-features = false }
polars-io = { version = "0.23.0", path = "./polars-io", features = ["private"], default-features = false, optional = true }
polars-lazy = { version = "0.23.0", path = "./polars-lazy", features = ["private"], default-features = false, optional = true }
Expand Down
19 changes: 19 additions & 0 deletions polars/polars-algo/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[package]
name = "polars-algo"
version = "0.23.0"
authors = ["hpux735 <william@housedillon.com>"]
edition = "2021"
license = "MIT"
repository = "https://github.com/pola-rs/polars"
description = "Algorithms built upon Polars primitives"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
polars-core = { version = "0.23.0", path = "../polars-core", features = ["private", "dtype-categorical", "asof_join"], default-features = false }
polars-lazy = { version = "0.23.0", path = "../polars-lazy", features = ["asof_join", "format_str"] }

[package.metadata.docs.rs]
all-features = true
# defines the configuration attribute `docsrs`
rustdoc-args = ["--cfg", "docsrs"]
98 changes: 98 additions & 0 deletions polars/polars-algo/src/algo.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
use polars_core::prelude::*;
use polars_lazy::prelude::*;

pub fn cut(
s: Series,
bins: Vec<f32>,
labels: Option<Vec<&str>>,
break_point_label: Option<&str>,
category_label: Option<&str>,
) -> Result<DataFrame> {
let var_name = s.name();

let breakpoint_str = if let Some(label) = break_point_label {
label
} else {
&"break_point"
};

let category_str = if let Some(label) = category_label {
label
} else {
&"category"
};

let cuts_df = df![
breakpoint_str => Series::new(breakpoint_str, &bins)
.extend_constant(AnyValue::Float64(f64::INFINITY), 1)?
]?;

let cuts_df = if let Some(labels) = labels {
if labels.len() != (bins.len() + 1) {
return Err(PolarsError::ShapeMisMatch(
"Labels count must equal bins count".into(),
));
}

cuts_df
.lazy()
.with_column(lit(Series::new(category_str, labels)))
} else {
cuts_df.lazy().with_column(
format_str(
"({}, {}]",
[
col(breakpoint_str).shift_and_fill(1, lit(f64::NEG_INFINITY)),
col(breakpoint_str),
],
)?
.alias(category_str),
)
}
.collect()?;

let cuts = cuts_df
.lazy()
.with_columns([col(category_str).cast(DataType::Categorical(None))])
.collect()?;

s.sort(false).into_frame().join_asof(
&cuts,
var_name,
breakpoint_str,
AsofStrategy::Forward,
None,
None,
)
}

#[test]
fn test_cut() -> Result<()> {
let samples: Vec<f32> = (0..12).map(|i| -3.0 + i as f32 * 0.5).collect();
let series = Series::new("a", samples);

let out = cut(series, vec![-1.0, 1.0], None, None, None)?;

let expected = df!(
"a" => [-3.0, -2.5, -2.0, -1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5],
"break_point" => [-1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, f64::INFINITY, f64::INFINITY, f64::INFINITY],
"category" => [
"(-inf, -1.0]",
"(-inf, -1.0]",
"(-inf, -1.0]",
"(-inf, -1.0]",
"(-inf, -1.0]",
"(-1.0, 1.0]",
"(-1.0, 1.0]",
"(-1.0, 1.0]",
"(-1.0, 1.0]",
"(1.0, inf]",
"(1.0, inf]",
"(1.0, inf]"
]
)?;

assert!(out.frame_equal_missing(&expected));

Ok(())
}
3 changes: 3 additions & 0 deletions polars/polars-algo/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#![cfg_attr(docsrs, feature(doc_cfg))]
mod algo;
pub use algo::*;

0 comments on commit 3408c52

Please sign in to comment.