Skip to content

Commit

Permalink
feat(nodejs): extend in place, arr.join & str.split (#2554)
Browse files Browse the repository at this point in the history
* fix(nodejs): extend

* feat(nodejs): extend, arr.join & str.split
  • Loading branch information
universalmind303 committed Feb 6, 2022
1 parent 48ea1ed commit 9c0c31f
Show file tree
Hide file tree
Showing 17 changed files with 475 additions and 340 deletions.
57 changes: 57 additions & 0 deletions nodejs-polars/__tests__/expr.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1078,6 +1078,24 @@ describe("expr.str", () => {
expect(actual).toFrameEqual(expected);
expect(seriesActual).toSeriesEqual(expected.getColumn("isLinux"));
});
test("split", () => {
const df = pl.DataFrame({"a": ["ab,cd", "e,fg", "h"]});
const expected = pl.DataFrame({"split": [["ab", "cd"], ["e", "fg"], ["h"]]});
const actual = df.select(
col("a")
.str
.split(",")
.as("split")
);
const actualFromSeries = df.getColumn("a")
.str
.split(",")
.rename("split")
.toFrame();

expect(actual).toFrameEqual(expected);
expect(actualFromSeries).toFrameEqual(expected);
});
test("extract", () => {
const df = pl.DataFrame({
"a": [
Expand Down Expand Up @@ -1591,6 +1609,45 @@ describe("expr.lst", () => {
expect(actual).toFrameEqual(expected);
expect(actualFromSeries).toFrameEqual(expected);
});
test("join", () => {

const df = pl.DataFrame({"a": [["ab", "cd"], ["e", "fg"], ["h"]]});
const expected = pl.DataFrame({"joinedString": ["ab,cd", "e,fg", "h"]});
const actual = df.select(
col("a")
.lst
.join()
.as("joinedString")
);
const actualFromSeries = df.getColumn("a")
.lst
.join()
.rename("joinedString")
.toFrame();

expect(actual).toFrameEqual(expected);
expect(actualFromSeries).toFrameEqual(expected);
});
test("join:separator", () => {

const df = pl.DataFrame({"a": [["ab", "cd"], ["e", "fg"], ["h"]]});
const expected = pl.DataFrame({"joinedString": ["ab|cd", "e|fg", "h"]});
const actual = df.select(
col("a")
.lst
.join("|")
.as("joinedString")
);
const actualFromSeries = df.getColumn("a")
.lst
.join("|")
.rename("joinedString")
.toFrame();

expect(actual).toFrameEqual(expected);
expect(actualFromSeries).toFrameEqual(expected);
});

test("last", () => {
const df = pl.DataFrame({"a": [[1, 10], [2, 12]]});
const expected = pl.DataFrame({"last": [10, 12]});
Expand Down
25 changes: 25 additions & 0 deletions nodejs-polars/polars/dataframe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,28 @@ export interface DataFrame extends Arithmetic<DataFrame> {
explode(column: ExprOrString): DataFrame
explode(columns: ExprOrString[]): DataFrame
explode(column: ExprOrString, ...columns: ExprOrString[]): DataFrame
/**
*
*
* __Extend the memory backed by this `DataFrame` with the values from `other`.__
* ___
Different from `vstack` which adds the chunks from `other` to the chunks of this `DataFrame`
`extent` appends the data from `other` to the underlying memory locations and thus may cause a reallocation.
If this does not cause a reallocation, the resulting data structure will not have any extra chunks
and thus will yield faster queries.
Prefer `extend` over `vstack` when you want to do a query after a single append. For instance during
online operations where you add `n` rows and rerun a query.
Prefer `vstack` over `extend` when you want to append many times before doing a query. For instance
when you read in multiple files and when to store them in a single `DataFrame`.
In the latter case, finish the sequence of `vstack` operations with a `rechunk`.
* @param other DataFrame to vertically add.
*/
extend(other: DataFrame): DataFrame
/**
* Fill null/missing values by a filling strategy
*
Expand Down Expand Up @@ -1578,6 +1600,9 @@ export const dfWrapper = (_df: JsDataFrame): DataFrame => {
.explode(columns)
.collectSync({noOptimization:true});
},
extend(other) {
return wrap("extend", {other: other._df});
},
filter(predicate) {
return this
.lazy()
Expand Down
2 changes: 1 addition & 1 deletion nodejs-polars/polars/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import * as series from "./series/series";
import * as df from "./dataframe";
import { DataType } from "./datatypes";
import * as func from "./functions";
import io from "./io";
import * as io from "./io";
import * as cfg from "./cfg";
import {version as _version} from "../package.json";

Expand Down

0 comments on commit 9c0c31f

Please sign in to comment.