Skip to content

Commit

Permalink
Nodejs: readRow enhancements (#2701)
Browse files Browse the repository at this point in the history
* wip: infer rows

* wip: infer rows

* wip: infer rows

* wip: infer rows

* resolving diffs

* code cleanup

* feat: add missing values support for row reader

* chore: code cleanup

* chore: code cleanup

* chore: code cleanup

* chore: code cleanup

* chore: fix whitespace

* chore: fix whitespace

* wip

* generic row infer

* chore: code cleanup

* chore: code cleanup

* chore: code cleanup

* resolve merges for "Categorical"

* chore: code cleanup
  • Loading branch information
universalmind303 committed Feb 23, 2022
1 parent b7b0adc commit 75ef2ce
Show file tree
Hide file tree
Showing 16 changed files with 1,044 additions and 660 deletions.
57 changes: 41 additions & 16 deletions nodejs-polars/__tests__/dataframe.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1368,9 +1368,9 @@ describe("io", () => {
});
test("toJSON:multiline", () => {
const rows = [
{foo: 1.1, bar: 6.2, ham: "a"},
{foo: 3.1, bar: 9.2, ham: "b"},
{foo: 3.1, bar: 9.2, ham: "c"}
{foo: 1.1},
{foo: 3.1},
{foo: 3.1}
];
const actual = pl.DataFrame(rows).toJSON({multiline:true});
const expected = rows.map(r => JSON.stringify(r)).join("\n").concat("\n");
Expand Down Expand Up @@ -1408,9 +1408,9 @@ describe("io", () => {
});
test("JSON.stringify(df)", () => {
const rows = [
{foo: 1.1, bar: 6.2, ham: "a"},
{foo: 3.1, bar: 9.2, ham: "b"},
{foo: 3.1, bar: 9.2, ham: "c"}
{foo: 1.1},
{foo: 3.1},
{foo: 3.1}
];
const df = pl.DataFrame(rows);
const expected = pl.DataFrame(rows).toJSON();
Expand All @@ -1419,9 +1419,9 @@ describe("io", () => {
});
test("toJSON:rows", () => {
const rows = [
{foo: 1.1, bar: 6.2, ham: "a"},
{foo: 3.1, bar: 9.2, ham: "b"},
{foo: 3.1, bar: 9.2, ham: "c"}
{foo: 1.1},
{foo: 3.1},
{foo: 3.1}
];
const expected = JSON.stringify(rows);
const actual = pl.DataFrame(rows).toJSON({orient:"row"});
Expand Down Expand Up @@ -1582,18 +1582,42 @@ describe("create", () => {
expect(df.row(1)).toEqual(rows[1]);
expect(df.columns).toEqual(expectedColumns);
});
test("from row objects", () => {
test("from row objects, inferred schema", () => {
const rows = [
{"num": 1, "date": new Date(Date.now()), "string": "foo1"},
{"num": 1, "date": new Date(Date.now()), "string": "foo2"}
{"num": 1, "date": new Date(Date.now()), "string": 1}
];

const df = pl.DataFrame(rows);
expect(df.row(0)).toEqual(Object.values(rows[0]));
expect(df.row(1)).toEqual(Object.values(rows[1]));
expect(df.columns).toEqual(Object.keys(rows[0]));
expect(df.dtypes).toEqual(["Float64", "Datetime", "Utf8"]);
const expected = [
rows[0],
{num: 1, date: rows[1].date, string: rows[1].string.toString()}
];

const df = pl.DataFrame(rows, {inferSchemaLength: 1});
expect(df.toObject({orient: "row"})).toEqual(expected);
expect(df.dtypes.sort()).toEqual(["Datetime", "Float64", "Utf8"]);
});
test("from row objects, with schema", () => {
const rows = [
{"num": 1, "date": new Date(Date.now()), "string": "foo1"},
{"num": 1, "date": new Date(Date.now())}
];

const expected = [
{num: 1, date: rows[0].date.toString(), string: "foo1"},
{num: 1, date: rows[1].date.toString(), string: null}
];

const schema = {
num: "Int32",
date: "Utf8",
string: "Utf8"
};
const df = pl.DataFrame(rows, {schema});
expect(df.toObject({orient: "row"})).toEqual(expected);
expect(df.schema()).toEqual(schema);
});

test("from nulls", () => {
const df = pl.DataFrame({"nulls": [null, null, null]});
const expected = pl.DataFrame([pl.Series("nulls", [null, null, null], pl.Float64)]);
Expand Down Expand Up @@ -1908,6 +1932,7 @@ describe("meta", () => {
const actual = df[0][0];
expect(actual).toStrictEqual(df.getColumn("os").get(0));
});

test("proxy:has", () => {
const df = pl.DataFrame({
os: ["apple", "linux"],
Expand Down
58 changes: 22 additions & 36 deletions nodejs-polars/__tests__/groupby.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ describe("groupby", () => {

const expected = pl.DataFrame({
"name": ["a", "b", "c"],
"foo_agg_list": [[1, 3], [3, 7], [5]],
"bar_agg_list": [[2, 4], [4, 8], [6]]
"foo": [[1, 3], [3, 7], [5]],
"bar": [[2, 4], [4, 8], [6]]
});
expect(actual).toFrameEqual(expected);
});
Expand All @@ -31,7 +31,7 @@ describe("groupby", () => {
.sort("name");
const expected = pl.DataFrame({
"name": ["a", "b", "c"],
"foo_min": [1, 3, 5]
"foo": [1, 3, 5]
});
expect(actual).toFrameEqual(expected);
});
Expand All @@ -43,26 +43,12 @@ describe("groupby", () => {
.sort("name");
const expected = pl.DataFrame({
"name": ["a", "b", "c"],
"foo_min": [1, 3, 5],
"bar_sum": [6, 12, 6]
});
expect(actual).toFrameEqual(expected);
});
test("agg:columns:multi_agg", () => {
const actual = df.groupBy("name").agg({
"foo": ["min", "first", "last"],
"bar": "sum"
})
.sort("name");
const expected = pl.DataFrame({
"name": ["a", "b", "c"],
"foo_min": [1, 3, 5],
"foo_first": [1, 3, 5],
"foo_last": [3, 7, 5],
"bar_sum": [6, 12, 6]
"foo": [1, 3, 5],
"bar": [6, 12, 6]
});
expect(actual).toFrameEqual(expected);
});

test("count", () => {
const actual = df.groupBy("name").count()
.sort("name");
Expand All @@ -78,8 +64,8 @@ describe("groupby", () => {
.sort("name");
const expected = pl.DataFrame({
"name": ["a", "b", "c"],
"foo_first": [1, 3, 5],
"bar_first": [2, 4, 6]
"foo": [1, 3, 5],
"bar": [2, 4, 6]
});
expect(actual).toFrameEqual(expected);
});
Expand All @@ -103,8 +89,8 @@ describe("groupby", () => {

const expected = pl.DataFrame({
"name": ["a", "b", "c"],
"foo_last": [3, 7, 5],
"bar_last": [4, 8, 6]
"foo": [3, 7, 5],
"bar": [4, 8, 6]
});
expect(actual).toFrameEqual(expected);
});
Expand All @@ -127,8 +113,8 @@ describe("groupby", () => {
.sort("name");
const expected = pl.DataFrame({
"name": ["a", "b", "c"],
"foo_max": [3, 7, 5],
"bar_max": [4, 8, 6]
"foo": [3, 7, 5],
"bar": [4, 8, 6]
});
expect(actual).toFrameEqual(expected);
});
Expand All @@ -139,8 +125,8 @@ describe("groupby", () => {
.sort("name");
const expected = pl.DataFrame({
"name": ["a", "b", "c"],
"foo_mean": [2, 5, 5],
"bar_mean": [3, 6, 6]
"foo": [2, 5, 5],
"bar": [3, 6, 6]
});
expect(actual).toFrameEqual(expected);
});
Expand All @@ -151,8 +137,8 @@ describe("groupby", () => {
.sort("name");
const expected = pl.DataFrame({
"name": ["a", "b", "c"],
"foo_median": [2, 5, 5],
"bar_median": [3, 6, 6]
"foo": [2, 5, 5],
"bar": [3, 6, 6]
});
expect(actual).toFrameEqual(expected);
});
Expand All @@ -163,8 +149,8 @@ describe("groupby", () => {
.sort("name");
const expected = pl.DataFrame({
"name": ["a", "b", "c"],
"foo_min": [1, 3, 5],
"bar_min": [2, 4, 6]
"foo": [1, 3, 5],
"bar": [2, 4, 6]
});
expect(actual).toFrameEqual(expected);
});
Expand All @@ -175,8 +161,8 @@ describe("groupby", () => {
.sort("name");
const expected = pl.DataFrame({
"name": ["a", "b", "c"],
"foo_n_unique": [2, 2, 1],
"bar_n_unique": [2, 2, 1]
"foo": [2, 2, 1],
"bar": [2, 2, 1]
});
expect(actual).toFrameEqual(expected);
});
Expand All @@ -187,8 +173,8 @@ describe("groupby", () => {
.sort("name");
const expected = pl.DataFrame({
"name": ["a", "b", "c"],
"foo_sum": [4, 10, 5],
"bar_sum": [6, 12, 6]
"foo": [4, 10, 5],
"bar": [6, 12, 6]
});
expect(actual).toFrameEqual(expected);
});
Expand Down
17 changes: 8 additions & 9 deletions nodejs-polars/polars/dataframe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2000,15 +2000,16 @@ export const dfWrapper = (_df: JsDataFrame): DataFrame => {
};
}
});

};

export interface DataFrameConstructor {
(): DataFrame
(data: Record<string, any>[]): DataFrame
(data: Series<any>[]): DataFrame
(data: any): DataFrame
(data: any[][], options: {columns?: any[], orient?: "row" | "col"}): DataFrame
(data: any, options?: {
columns?: any[],
orient?: "row" | "col",
schema?: Record<string, string | DataType>,
inferSchemaLength?: number,
}): DataFrame
isDataFrame(arg: any): arg is DataFrame;
}
function DataFrameConstructor(data?, options?): DataFrame {
Expand All @@ -2018,7 +2019,7 @@ function DataFrameConstructor(data?, options?): DataFrame {
}

if (Array.isArray(data)) {
return dfWrapper(arrayToJsDataFrame(data, options?.columns, options?.orient));
return dfWrapper(arrayToJsDataFrame(data, options));
}

return dfWrapper(objToDF(data as any));
Expand All @@ -2036,8 +2037,6 @@ function objToDF(obj: Record<string, Array<any>>): any {
return pli.df.read_columns({columns});
}
const isDataFrame = (ty: any): ty is DataFrame => isExternal(ty?._df);
export namespace pl {
export const DataFrame: DataFrameConstructor = Object.assign(DataFrameConstructor, {isDataFrame});
}


export const DataFrame: DataFrameConstructor = Object.assign(DataFrameConstructor, {isDataFrame});
1 change: 1 addition & 0 deletions nodejs-polars/polars/datatypes.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import {jsTypeToPolarsType} from "@polars/internals/construction";
import pli from "./internals/polars_internal";

export type DtypeToPrimitive<T> = T extends DataType.Bool ? boolean :
Expand Down
18 changes: 7 additions & 11 deletions nodejs-polars/polars/internals/construction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,11 @@ export function arrayToJsSeries(name: string, values: any[], dtype?: any, strict
return series;
}

export function arrayToJsDataFrame(data: any[], columns?: string[], orient?: "col"| "row", typedArrays?: boolean): any {
export function arrayToJsDataFrame(data: any[], options?): any {
let columns = options?.columns;
let orient = options?.orient;


let dataSeries;

if(!data.length) {
Expand All @@ -141,7 +145,7 @@ export function arrayToJsDataFrame(data: any[], columns?: string[], orient?: "co
});
}
else if(data[0].constructor.name === "Object") {
const df = pli.df.read_rows({rows: data});
const df = pli.df.read_rows({rows: data, ...options});

if(columns) {
pli.df.set_column_names({_df: df, names: columns});
Expand All @@ -160,17 +164,9 @@ export function arrayToJsDataFrame(data: any[], columns?: string[], orient?: "co

return df;
} else {
if(typedArrays) {

dataSeries = data.map((s, idx) => Series.from(s)
.as(`column_${idx}`)
.inner()
);
dataSeries = data.map((s, idx) => Series(`column_${idx}`, s).inner());

} else {

dataSeries = data.map((s, idx) => Series(`column_${idx}`, s).inner());
}
}

}
Expand Down
15 changes: 12 additions & 3 deletions nodejs-polars/polars/lazy/expr.ts
Original file line number Diff line number Diff line change
Expand Up @@ -491,8 +491,11 @@ export interface Expr extends
take({index}: {index: Expr | number[] | Series<number>}): Expr
/** Take every nth value in the Series and return as a new Series. */
takeEvery(n: number): Expr
/** Get the unique/distinct values in the list */
unique(): Expr
/**
* Get the unique/distinct values in the list
* @param maintainOrder Maintain order of data. This requires more work.
*/
unique(maintainOrder?: boolean | {maintainOrder: boolean}): Expr
/** Returns a unit Series with the highest value possible for the dtype of this expression. */
upperBound(): Expr
/** Get variance. */
Expand Down Expand Up @@ -761,7 +764,13 @@ const _Expr = (_expr: any): Expr => {
return wrap("take", {other: indices._expr});
},
takeEvery: wrapUnary("takeEvery", "n"),
unique: wrapNullArgs("unique"),
unique(opt?) {
if(opt) {
return wrap("unique_stable");
}

return wrap("unique");
},
upperBound: wrapNullArgs("upperBound"),
where: wrapExprArg("filter"),
var: wrapNullArgs("var"),
Expand Down
15 changes: 11 additions & 4 deletions nodejs-polars/polars/series/series.ts
Original file line number Diff line number Diff line change
Expand Up @@ -920,6 +920,7 @@ export interface Series<T> extends
/**
* __Get unique elements in series.__
* ___
* @param maintainOrder Maintain order of data. This requires more work.
* @example
* ```
* s = pl.Series("a", [1, 2, 2, 3])
Expand All @@ -933,8 +934,8 @@ export interface Series<T> extends
* ]
* ```
*/
unique(): Series<T>
/**
unique(maintainOrder?: boolean | {maintainOrder: boolean}): Series<T>
/**
* __Count the unique values in a Series.__
* ___
* @example
Expand Down Expand Up @@ -991,7 +992,7 @@ export interface Series<T> extends
* @example
* ```
* const s = pl.Series("foo", [1,2,3])
* s.toJS()
* s.toObject()
* {
* name: "foo",
* datatype: "Float64",
Expand Down Expand Up @@ -1571,7 +1572,13 @@ export const seriesWrapper = <T>(_s: JsSeries): Series<T> => {
return unwrap<Buffer>("to_json").toString();
},
toObject: noArgUnwrap("to_js"),
unique: noArgWrap("unique"),
unique(opt?) {
if(opt) {
return wrap("unique_stable");
} else {
return wrap("unique");
}
},
valueCounts() {
return dfWrapper(unwrap("value_counts"));
},
Expand Down

0 comments on commit 75ef2ce

Please sign in to comment.