Skip to content

Commit

Permalink
[js] init DataFrame
Browse files Browse the repository at this point in the history
Cannot accept `Vec<T>` so we need to assign `Series`
  • Loading branch information
ritchie46 committed Apr 15, 2021
1 parent 897ef3e commit a248b92
Show file tree
Hide file tree
Showing 11 changed files with 146 additions and 38 deletions.
2 changes: 1 addition & 1 deletion examples/iris_classifier/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ fn one_hot_encode(mut df: DataFrame) -> Result<DataFrame> {
.collect::<ListChunked>()
.into_series();
ohe.rename("ohe");
df.add_column(ohe)?;
df.with_column(ohe)?;

Ok(df)
}
Expand Down
6 changes: 1 addition & 5 deletions js-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@ documentation = "https://ritchie46.github.io/polars-book/"
homepage = "https://github.com/ritchie46/polars"

[lib]
crate-type = ["cdylib", "rlib"]
crate-type = ["cdylib"]

[features]

[dependencies]
wasm-bindgen = "0.2.34"

wee_alloc = { version = "0.4.5" }

[dependencies.polars-core]
Expand All @@ -27,9 +26,6 @@ features = [
"performant",
]

[dev-dependencies]
wasm-bindgen-test = "0.3.13"

[profile.release]
# Tell `rustc` to optimize for small code size.
opt-level = "s"
Expand Down
8 changes: 8 additions & 0 deletions js-polars/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,11 @@ const assert = require("assert")
let s = new pl.Series("a", [1, 2, 3])
assert(s.mean() === 2)
console.log(s.mean())
console.log(s.toString())
console.log(s.toJSON())
s.log()

let df = new pl.DataFrame();
df.assign(s);

console.log(df)
35 changes: 35 additions & 0 deletions js-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
use polars_core::prelude::DataFrame as PDataFrame;
use polars_core::prelude::Series as PSeries;
use super::{
JsPolarsError,
series::*,
};
use wasm_bindgen::prelude::*;
use wasm_bindgen::JsCast;

#[wasm_bindgen]
pub struct DataFrame {
df: PDataFrame,
}

impl From<PDataFrame> for DataFrame {
fn from(df: PDataFrame) -> Self {
Self { df }
}
}


#[wasm_bindgen]
impl DataFrame {

#[wasm_bindgen(constructor)]
pub fn new() -> Self {
PDataFrame::new_no_checks(vec![]).into()
}

pub fn assign(&self, series: Series) -> Result<DataFrame, JsValue> {
let mut df = self.df.clone();
df.with_column(series.series).map_err(JsPolarsError::from)?;
Ok(df.into())
}
}
32 changes: 32 additions & 0 deletions js-polars/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,36 @@
mod series;
mod dataframe;

use wasm_bindgen::prelude::*;
use polars_core::error::PolarsError;

#[wasm_bindgen]
extern "C" {
#[wasm_bindgen(js_namespace = console)]
fn log(s: &str);
}

#[macro_export]
macro_rules! console_log {
// Note that this is using the `log` function imported above during
// `bare_bones`
($($t:tt)*) => (log(&format_args!($($t)*).to_string()))
}

#[global_allocator]
static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT;


pub struct JsPolarsError(PolarsError);

impl From<PolarsError> for JsPolarsError {
fn from(e: PolarsError) -> Self {
Self(e)
}
}

impl From<JsPolarsError> for JsValue {
fn from(e: JsPolarsError) -> Self {
format!("{:?}", e.0).into()
}
}
34 changes: 33 additions & 1 deletion js-polars/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,15 @@ use polars_core::prelude::{
};
use std::ops::{BitAnd, BitOr};
use wasm_bindgen::prelude::*;
use crate::{
console_log,
log,
};

#[wasm_bindgen]
#[repr(transparent)]
pub struct Series {
series: PSeries,
pub(crate) series: PSeries,
}

impl From<PSeries> for Series {
Expand All @@ -33,6 +38,33 @@ impl Series {
}
}

#[wasm_bindgen(js_name = toString)]
pub fn to_string(&self) -> String {
format!("{}", self.series)
}

pub fn log(&self) {
console_log!("{}", self.series)
}

#[wasm_bindgen(js_name = toJSON)]
pub fn to_json(&self) -> String {
let mut series_fmt = String::with_capacity(10);
series_fmt.push('[');
let n = std::cmp::min(self.series.len(), 5);
for i in 0..n {
let val = self.series.get(i);
if i < n -1 {
series_fmt.push_str(&format!("{}, ", val))
} else {
series_fmt.push_str(&format!("{}", val))
}
}
series_fmt.push(']');

format!(r#"{{ {}: {} }}"#, self.series.name(), series_fmt)
}

pub fn rechunk(&mut self, in_place: bool) -> Option<Series> {
let series = self.series.rechunk();
if in_place {
Expand Down
13 changes: 0 additions & 13 deletions js-polars/tests/web.rs

This file was deleted.

6 changes: 4 additions & 2 deletions polars/polars-core/src/frame/hash_join/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1241,7 +1241,8 @@ mod test {
+ df_a.column("b").unwrap().utf8().unwrap();
s.rename("dummy");

let df_a = df_a.with_column(s).unwrap();
let mut df_a = df_a.clone();
df_a.with_column(s).unwrap();
let mut s = df_b
.column("foo")
.unwrap()
Expand All @@ -1251,7 +1252,8 @@ mod test {
.unwrap()
+ df_b.column("bar").unwrap().utf8().unwrap();
s.rename("dummy");
let df_b = df_b.with_column(s).unwrap();
let mut df_b = df_b.clone();
df_b.with_column(s).unwrap();

let joined = df_a.left_join(&df_b, "dummy", "dummy").unwrap();
let ham_col = joined.column("ham").unwrap();
Expand Down
31 changes: 15 additions & 16 deletions polars/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -421,13 +421,17 @@ impl DataFrame {
self.insert_at_idx_no_name_check(index, series)
}

/// Add a new column to this `DataFrame`.
pub fn add_column<S: IntoSeries>(&mut self, column: S) -> Result<&mut Self> {
/// Add a new column to this `DataFrame` or replace an existing one.
pub fn with_column<S: IntoSeries>(&mut self, column: S) -> Result<&mut Self> {
let series = column.into_series();
self.has_column(series.name())?;
if series.len() == self.height() {
self.columns.push(series);
self.rechunk();
if series.len() == self.height() || self.is_empty() {
if self.has_column(series.name()).is_err() {
let name = series.name().to_string();
self.apply(&name, |_| series)?;
} else {
self.columns.push(series);
self.rechunk();
}
Ok(self)
} else {
Err(PolarsError::ShapeMisMatch(
Expand All @@ -441,13 +445,6 @@ impl DataFrame {
}
}

/// Create a new `DataFrame` with the column added.
pub fn with_column<S: IntoSeries>(&self, column: S) -> Result<Self> {
let mut df = self.clone();
df.add_column(column)?;
Ok(df)
}

/// Get a row in the `DataFrame` Beware this is slow.
///
/// # Example
Expand Down Expand Up @@ -760,7 +757,7 @@ impl DataFrame {
pub fn replace_or_add<S: IntoSeries>(&mut self, column: &str, new_col: S) -> Result<&mut Self> {
let new_col = new_col.into_series();
match self.replace(column, new_col.clone()) {
Err(_) => self.add_column(new_col),
Err(_) => self.with_column(new_col),
Ok(_) => Ok(self),
}
}
Expand Down Expand Up @@ -1737,8 +1734,10 @@ mod test {
"foo" => &[1, 2, 3]
}
.unwrap();
assert!(df.add_column(Series::new("foo", &[1, 2, 3])).is_err());
assert!(df.add_column(Series::new("bar", &[1, 2, 3])).is_ok());
// check if column is replaced
assert!(df.with_column(Series::new("foo", &[1, 2, 3])).is_ok());
assert!(df.with_column(Series::new("bar", &[1, 2, 3])).is_ok());
assert!(df.column("bar").is_ok())
}

#[test]
Expand Down
11 changes: 11 additions & 0 deletions py-polars/polars/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1133,6 +1133,17 @@ def join(

return wrap_df(out)

def with_column(self, column: "Series") -> "DataFrame":
"""
Return a new DataFrame with the column added or replaced
Parameters
----------
column
Series, where the name of the Series refers to the column in the DataFrame.
"""
return wrap_df(self._df.with_column(column._s))

def hstack(
self, columns: "Union[List[Series], DataFrame]", in_place=False
) -> Optional["DataFrame"]:
Expand Down
6 changes: 6 additions & 0 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,12 @@ impl PyDataFrame {
Ok(())
}

pub fn with_column(&mut self, s: PySeries) -> PyResult<Self> {
let mut df = self.df.clone();
df.with_column(s.series).map_err(PyPolarsEr::from)?;
Ok(df.into())
}

/// Get datatypes
pub fn dtypes(&self) -> Vec<u8> {
self.df
Expand Down

0 comments on commit a248b92

Please sign in to comment.