Skip to content

Commit

Permalink
Nodejs transpose & distinct (#2503)
Browse files Browse the repository at this point in the history
* wip: transpose impl

* wip: node transpose

* feat: transpose & distinct

* update rollingQuantile

* fix: rollingQuantile
  • Loading branch information
universalmind303 committed Jan 31, 2022
1 parent 494eaa3 commit bac9cf9
Show file tree
Hide file tree
Showing 17 changed files with 380 additions and 53 deletions.
62 changes: 62 additions & 0 deletions nodejs-polars/__tests__/dataframe.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1079,6 +1079,68 @@ describe("dataframe", () => {
const expected = [9, 8];
expect(actual).toEqual(expected);
});
test("transpose", () => {
const expected = pl.DataFrame({
"column_0": [1, 1],
"column_1": [2, 2],
"column_2": [3, 3]
});
const df = pl.DataFrame({
a: [1, 2, 3],
b: [1, 2, 3]
});
const actual = df.transpose();
expect(actual).toFrameEqual(expected);
});
test("transpose:includeHeader", () => {
const expected = pl.DataFrame({
"column": ["a", "b"],
"column_0": [1, 1],
"column_1": [2, 2],
"column_2": [3, 3]
});
const df = pl.DataFrame({
a: [1, 2, 3],
b: [1, 2, 3]
});
const actual = df.transpose({includeHeader:true});
expect(actual).toFrameEqual(expected);
});
test("transpose:columnNames", () => {
const expected = pl.DataFrame({
"a": [1, 1],
"b": [2, 2],
"c": [3, 3]
});
const df = pl.DataFrame({
a: [1, 2, 3],
b: [1, 2, 3]
});
const actual = df.transpose({includeHeader:false, columnNames: "abc"});
expect(actual).toFrameEqual(expected);
});
test("transpose:columnNames:generator", () => {
const expected = pl.DataFrame({
"col_0": [1, 1],
"col_1": [2, 2],
"col_2": [3, 3]
});
function *namesGenerator() {
const baseName = "col_";
let count = 0;
while(true) {
let name = `${baseName}${count}`;
yield name;
count++;
}
}
const df = pl.DataFrame({
a: [1, 2, 3],
b: [1, 2, 3]
});
const actual = df.transpose({includeHeader:false, columnNames: namesGenerator()});
expect(actual).toFrameEqual(expected);
});
test("var", () => {
const actual = pl.DataFrame({
"foo": [1, 2, 3],
Expand Down
6 changes: 6 additions & 0 deletions nodejs-polars/__tests__/expr.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1903,7 +1903,13 @@ describe("rolling", () => {
.rollingQuantile({windowSize: 2, quantile: 0.5})
.prefix("rolling_quantile_")
);
const seriesActual = df
.getColumn("a")
.rollingQuantile({windowSize: 2, quantile: 0.5})
.rename("rolling_quantile_a");

expect(actual).toFrameStrictEqual(expected);
expect(seriesActual).toSeriesStrictEqual(expected["rolling_quantile_a"]);
});
test("rollingSkew", () => {
const df = pl.DataFrame({"a": [1, 2, 3, 3, 2, 10, 8]});
Expand Down
4 changes: 2 additions & 2 deletions nodejs-polars/__tests__/lazyframe.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ describe("lazyframe", () => {
"bar": ["a", "b"]
}).lazy();
const actual = df.describePlan().replace(/\s+/g, " ");
expect(actual).toEqual(`MEMTABLE: ["foo", "bar"];
expect(actual).toEqual(`DATAFRAME(in-memory): ["foo", "bar"];
project */2 columns | details: None;
selection: "None" `.replace(/\s+/g, " "));
});
Expand All @@ -42,7 +42,7 @@ selection: "None" `.replace(/\s+/g, " "));
}).lazy();
const actual = df.describeOptimizedPlan().replace(/\s+/g, " ");
expect(actual).toEqual(
`MEMTABLE: ["foo", "bar"];
`DATAFRAME(in-memory): ["foo", "bar"];
project */2 columns | details: None;
selection: "None" `.replace(/\s+/g, " "));
});
Expand Down
134 changes: 128 additions & 6 deletions nodejs-polars/polars/dataframe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,16 @@ export interface DataFrame extends Arithmetic<DataFrame> {
* ```
*/
describe(): DataFrame

/**
* Drop duplicate rows from this DataFrame.
* Note that this fails if there is a column of type `List` in the DataFrame.
* @param maintainOrder
* @param subset - subset to drop duplicates for
* @param keep "first" | "last"
*/
distinct(maintainOrder?: boolean, subset?: ColumnSelection, keep?: "first"| "last"): DataFrame
distinct(opts: {maintainOrder?: boolean, subset?: ColumnSelection, keep?: "first"| "last"}): DataFrame
/**
* __Remove column from DataFrame and return as new.__
* ___
Expand Down Expand Up @@ -210,8 +220,10 @@ export interface DataFrame extends Arithmetic<DataFrame> {
* Note that this fails if there is a column of type `List` in the DataFrame.
* @param maintainOrder
* @param subset - subset to drop duplicates for
* @deprecated @since 0.2.1 @use {@link distinct}
*/
dropDuplicates(maintainOrder?: boolean, subset?: ColumnSelection): DataFrame
/** @deprecated @since 0.2.1 @use {@link distinct} ==*/
dropDuplicates(opts: {maintainOrder?: boolean, subset?: ColumnSelection}): DataFrame
/**
* __Return a new DataFrame where the null values are dropped.__
Expand Down Expand Up @@ -1228,6 +1240,79 @@ export interface DataFrame extends Arithmetic<DataFrame> {
toSeries(index: number): Series<any>
toString(): string
/**
* Transpose a DataFrame over the diagonal.
*
* @note This is a very expensive operation. Perhaps you can do it differently.
* @param options
* @param options.includeHeader If set, the column names will be added as first column.
* @param options.headerName If `includeHeader` is set, this determines the name of the column that will be inserted
* @param options.columnNames Optional generator/iterator that yields column names. Will be used to replace the columns in the DataFrame.
*
* @example
* >>> df = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
* >>> df.transpose({includeHeader:true})
* shape: (2, 4)
* ┌────────┬──────────┬──────────┬──────────┐
* │ column ┆ column_0 ┆ column_1 ┆ column_2 │
* │ --- ┆ --- ┆ --- ┆ --- │
* │ str ┆ i64 ┆ i64 ┆ i64 │
* ╞════════╪══════════╪══════════╪══════════╡
* │ a ┆ 1 ┆ 2 ┆ 3 │
* ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
* │ b ┆ 1 ┆ 2 ┆ 3 │
* └────────┴──────────┴──────────┴──────────┘
* // replace the auto generated column names with a list
* >>> df.transpose({includeHeader:false, columnNames:["a", "b", "c"]})
* shape: (2, 3)
* ┌─────┬─────┬─────┐
* │ a ┆ b ┆ c │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ i64 │
* ╞═════╪═════╪═════╡
* │ 1 ┆ 2 ┆ 3 │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
* │ 1 ┆ 2 ┆ 3 │
* └─────┴─────┴─────┘
*
* // Include the header as a separate column
* >>> df.transpose({
* ... includeHeader:true,
* ... headerName:"foo",
* ... columnNames:["a", "b", "c"]
* ... })
* shape: (2, 4)
* ┌─────┬─────┬─────┬─────┐
* │ foo ┆ a ┆ b ┆ c │
* │ --- ┆ --- ┆ --- ┆ --- │
* │ str ┆ i64 ┆ i64 ┆ i64 │
* ╞═════╪═════╪═════╪═════╡
* │ a ┆ 1 ┆ 2 ┆ 3 │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
* │ b ┆ 1 ┆ 2 ┆ 3 │
* └─────┴─────┴─────┴─────┘
*
* // Replace the auto generated column with column names from a generator function
* >>> function *namesGenerator() {
* ... const baseName = "my_column_";
* ... let count = 0;
* ... let name = `${baseName}_${count}`;
* ... count++;
* ... yield name;
* ... }
* >>> df.transpose({includeHeader:false, columnNames:namesGenerator})
* shape: (2, 3)
* ┌─────────────┬─────────────┬─────────────┐
* │ my_column_0 ┆ my_column_1 ┆ my_column_2 │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ i64 │
* ╞═════════════╪═════════════╪═════════════╡
* │ 1 ┆ 2 ┆ 3 │
* ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
* │ 1 ┆ 2 ┆ 3 │
* └─────────────┴─────────────┴─────────────┘
*/
transpose(options?: {includeHeader?: boolean, headerName?: string, columnNames?: Iterable<string>})
/**
* Aggregate the columns of this DataFrame to their variance value.
* @example
* ```
Expand Down Expand Up @@ -1468,14 +1553,24 @@ export const dfWrapper = (_df: JsDataFrame): DataFrame => {
return wrap("drop_nulls");
}
},
dropDuplicates(opts: any=false, subset?) {
const maintainOrder = opts?.maintainOrder ?? opts;
subset = opts?.subset ?? subset;
if(typeof subset! === "string") {
subset = [subset];
distinct(opts: any = false, subset?, keep = "first") {
const defaultOptions = {
maintainOrder: false,
keep,
};

if(typeof opts === "boolean") {
return wrap("distinct", {...defaultOptions, maintainOrder: opts, subset, keep});
}

return wrap("drop_duplicates", {maintainOrder, subset});
if(opts.subset) {
opts.subset = [opts.subset].flat(3);
}

return wrap("distinct", {...defaultOptions, ...opts});
},
dropDuplicates(opts: any=false, subset?) {
return this.distinct(opts, subset);
},
explode(...columns) {
return dfWrapper(_df)
Expand Down Expand Up @@ -1764,6 +1859,33 @@ export const dfWrapper = (_df: JsDataFrame): DataFrame => {
},
toSeries: (index) => seriesWrapper(unwrap("select_at_idx", {index})),
toString: () => noArgUnwrap<any>("as_str")().toString(),
transpose(options?) {

let df = wrap("transpose", options);
if(options?.columnNames) {

function *namesIter() {
if(options?.includeHeader) {
yield options.headerName;
}
const gen = (options as any).columnNames[Symbol.iterator]();
let next;
// eslint-disable-next-line no-cond-assign
while (next = gen.next()) {
yield next.value;
}
}

const newColumns = Array.from(
{length: df.width},
(i => () => i.next().value)(namesIter())
);

df.columns = newColumns;
}

return df;
},
var: noArgWrap("var"),
map: (fn) => map(dfWrapper(_df), fn as any) as any,
row: (index) => unwrap("to_row", {idx: index}),
Expand Down
33 changes: 27 additions & 6 deletions nodejs-polars/polars/lazy/dataframe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,19 @@ export interface LazyDataFrame {
/**
* Drop duplicate rows from this DataFrame.
* Note that this fails if there is a column of type `List` in the DataFrame.
* @param maintainOrder
* @param subset - subset to drop duplicates for
* @param keep "first" | "last"
*/
distinct(maintainOrder?: boolean, subset?: ColumnSelection, keep?: "first" | "last"): LazyDataFrame
distinct(opts: {maintainOrder?: boolean, subset?: ColumnSelection, keep?: "first" | "last"}): LazyDataFrame
/**
* Drop duplicate rows from this DataFrame.
* Note that this fails if there is a column of type `List` in the DataFrame.
* @deprecated @since 0.2.1 @use {@link distinct}
*/
dropDuplicates(opts: {maintainOrder?: boolean, subset?: ColumnSelection}): LazyDataFrame
/** @deprecated @since 0.2.1 @use {@link distinct} */
dropDuplicates(maintainOrder?: boolean, subset?: ColumnSelection): LazyDataFrame
/**
* Drop rows with null values from this DataFrame.
Expand Down Expand Up @@ -300,14 +311,24 @@ export const LazyDataFrame = (ldf: JsLazyFrame): LazyDataFrame => {
drop(...cols) {
return wrap("dropColumns", {cols: cols.flat(2)});
},
dropDuplicates(opts: any=false, subset?) {
const maintainOrder = opts?.maintainOrder ?? opts;
subset = opts?.subset ?? subset;
if(typeof subset! === "string") {
subset = [subset];
distinct(opts: any = false, subset?, keep = "first") {
const defaultOptions = {
maintainOrder: false,
keep: "first",
};

if(typeof opts === "boolean") {
return wrap("distinct", {...defaultOptions, maintainOrder: opts, subset, keep});
}

if(opts.subset) {
opts.subset = [opts.subset].flat(3);
}

return wrap("dropDuplicates", {maintainOrder, subset});
return wrap("distinct", {...defaultOptions, ...opts});
},
dropDuplicates(opts, subset?) {
return this.distinct(opts, subset);
},
dropNulls(...subset) {
if(subset.length) {
Expand Down
15 changes: 14 additions & 1 deletion nodejs-polars/polars/lazy/expr.ts
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,20 @@ const _Expr = (_expr: any): Expr => {
rollingStd: rolling("rollingStd"),
rollingVar: rolling("rollingVar"),
rollingMedian: rolling("rollingMedian"),
rollingQuantile: wrapBinary("rollingQuantile", "windowSize", "quantile"),
rollingQuantile(val, interpolation?, windowSize?, weights?, minPeriods?, center?) {
if(typeof val === "number") {
return wrap("rollingQuantile", {
quantile: val,
interpolation,
windowSize,
weights,
minPeriods,
center
});
}

return wrap("rollingQuantile", val);
},
rollingSkew(val, bias=true) {
if(typeof val === "number") {
return wrap("rollingSkew", {windowSize: val, bias});
Expand Down
18 changes: 13 additions & 5 deletions nodejs-polars/polars/series/series.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1399,11 +1399,19 @@ export const seriesWrapper = <T>(_s: JsSeries): Series<T> => {
rollingStd: rolling("rolling_std"),
rollingVar: rolling("rolling_var"),
rollingMedian: rolling("rollingMedian"),
rollingQuantile(windowSize, quantile?) {
return this
.toFrame()
.select(col(this.name).rollingQuantile(windowSize, quantile))
.getColumn(this.name);
rollingQuantile(val, interpolation?, windowSize?, weights?, minPeriods?, center?) {
if(typeof val === "number") {
return wrap("rolling_quantile", {
quantile: val,
interpolation,
windowSize,
weights,
minPeriods,
center
});
}

return wrap("rolling_quantile", val);
},
rollingSkew(windowSize, bias?) {
return this
Expand Down

0 comments on commit bac9cf9

Please sign in to comment.