Skip to content

Commit

Permalink
node updates (#3984)
Browse files Browse the repository at this point in the history
* add join_asof & pivot

* refactor(nodejs-dtypes): big refactor on datatypes for full recursive support

* working on some serde issues

* update parquet reader options & fix lazy serde

* deterministic row infer schema

* update test-js.yaml

* lets try the "either" again

* code cleanup
  • Loading branch information
universalmind303 committed Jul 18, 2022
1 parent 36c7da7 commit f94b29e
Show file tree
Hide file tree
Showing 30 changed files with 1,956 additions and 821 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test-js.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
strategy:
matrix:
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
node: ["16", "17"]
node: ["16", "17", "18"]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v2
Expand All @@ -24,7 +24,7 @@ jobs:
- name: Install latest Rust nightly
uses: actions-rs/toolchain@v1
with:
toolchain: nightly-2022-06-22
toolchain: nightly-2022-07-03
override: true
components: rustfmt, clippy
- run: yarn --version
Expand Down
2 changes: 1 addition & 1 deletion nodejs-polars/@types/jest.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ declare global {
interface Matchers<R> {
toSeriesEqual(b: Series<any>): R;
toSeriesStrictEqual(b: Series<any>): R;
toFrameEqual(b: DataFrame): R;
toFrameEqual(b: DataFrame, nullEqual?: boolean): R;
/**
* Compares two DataFrames, including the dtypes
*
Expand Down
1 change: 1 addition & 0 deletions nodejs-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ features = [
"ipc",
"avro",
"list_eval",
"arg_where",
]
path = "../polars"

Expand Down
45 changes: 41 additions & 4 deletions nodejs-polars/__tests__/dataframe.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1038,6 +1038,25 @@ describe("dataframe", () => {
]);
expect(actual).toFrameEqual(expected);
});
test("pivot", () => {
const df = pl.DataFrame({
"a": pl.Series([1, 2, 3]).cast(pl.Int32),
"b": pl.Series([[1, 1], [2, 2], [3, 3]]).cast(pl.List(pl.Int32))
});

const expected = pl.DataFrame(
{
"a": pl.Series([1, 2, 3]).cast(pl.Int32),
"1": pl.Series([[1, 1], null, null]).cast(pl.List(pl.Int32)),
"2": pl.Series([null, [2, 2], null]).cast(pl.List(pl.Int32)),
"3": pl.Series([null, null, [3, 3]]).cast(pl.List(pl.Int32)),
}
).select("a", "1", "2", "3");

const actual = df.pivot("b", {index:"a", columns:"a", aggregateFunc:"first", sortColumns:true});

expect(actual).toFrameEqual(expected, true);
});
});
describe("join", () => {
test("on", () => {
Expand Down Expand Up @@ -1268,6 +1287,24 @@ describe("join", () => {
});
expect(actual).toFrameEqual(expected);
});
test("asof_cross_join", () => {
const left = pl.DataFrame({"a": [-10, 5, 10], "left_val": ["a", "b", "c"]});
const right = pl.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]});

// only test dispatch of asof join
let out = left.joinAsof(right, {on:"a"});
expect(out.shape).toEqual({height: 3, width: 3});

out = left.lazy().joinAsof(right.lazy(), {on:"a"}).collectSync();
expect(out.shape).toEqual({height: 3, width: 3});

// only test dispatch of cross join
out = left.join(right, {how:"cross"});
expect(out.shape).toEqual({height: 15, width: 4});

left.lazy().join(right.lazy(), {how:"cross"}).collectSync();
expect(out.shape).toEqual({height: 15, width: 4});
});
});
describe("io", () => {
const df = pl.DataFrame([
Expand Down Expand Up @@ -1458,13 +1495,13 @@ describe("create", () => {
bool: pl.Bool,
date: pl.Date,
date_nulls: pl.Date,
datetime: pl.Datetime,
datetime_nulls: pl.Datetime,
datetime: pl.Datetime("ms"),
datetime_nulls: pl.Datetime("ms"),
string: pl.Utf8,
string_nulls: pl.Utf8,
categorical: pl.Categorical,
categorical_nulls: pl.Categorical,
list: pl.List,
list: pl.List(pl.Float64),
float_64: pl.Float64,
float_64_nulls: pl.Float64,
uint_64: pl.UInt64,
Expand All @@ -1485,7 +1522,7 @@ describe("create", () => {
});
test("from series-array", () => {
const s1 = pl.Series("num", [1, 2, 3]);
const s2 = pl.Series("date", [null, Date.now(), Date.now()], pl.Datetime);
const s2 = pl.Series("date", [null, Date.now(), Date.now()], pl.Datetime("ms"));
const df = pl.DataFrame([s1, s2]);
expect(df.getColumn("num")).toSeriesEqual(s1);
expect(df.getColumn("date")).toSeriesEqual(s2);
Expand Down
122 changes: 122 additions & 0 deletions nodejs-polars/__tests__/datelike.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import pl from "@polars";
describe("datelike", () => {
test("asof join", () => {
const fmt = "%F %T%.3f";
const quotes = pl.DataFrame(
{
dates: pl.Series([
"2016-05-25 13:30:00.023",
"2016-05-25 13:30:00.023",
"2016-05-25 13:30:00.030",
"2016-05-25 13:30:00.041",
"2016-05-25 13:30:00.048",
"2016-05-25 13:30:00.049",
"2016-05-25 13:30:00.072",
"2016-05-25 13:30:00.075"
]).str.strptime(pl.Datetime("ms"), fmt),
ticker: [
"GOOG",
"MSFT",
"MSFT",
"MSFT",
"GOOG",
"AAPL",
"GOOG",
"MSFT",
],
bid: [
720.5,
51.95,
51.97,
51.99,
720.50,
97.99,
720.50,
52.01
],
});
const trades = pl.DataFrame({
dates: pl.Series([
"2016-05-25 13:30:00.023",
"2016-05-25 13:30:00.038",
"2016-05-25 13:30:00.048",
"2016-05-25 13:30:00.048",
"2016-05-25 13:30:00.048"
]).str.strptime(pl.Datetime("ms"), fmt),
ticker: [
"MSFT",
"MSFT",
"GOOG",
"GOOG",
"AAPL",
],
bid: [
51.95,
51.95,
720.77,
720.92,
98.0
],
});
let out: any = trades.joinAsof(quotes, {on: "dates"});
expect(out.columns).toEqual(["dates", "ticker", "bid", "ticker_right", "bid_right"]);
expect(out.getColumn("dates").cast(pl.Float64)
.div(1000)
.toArray()).toEqual([
1464183000023,
1464183000038,
1464183000048,
1464183000048,
1464183000048,
]);
out = trades.joinAsof(quotes, {on:"dates", strategy:"forward"}).getColumn("bid_right")
.toArray();
expect(out).toEqual([720.5, 51.99, 720.5, 720.5, 720.5]);

out = trades.joinAsof(quotes, {on:"dates", by:"ticker"});
expect(out.getColumn("bid_right").toArray()).toEqual([51.95, 51.97, 720.5, 720.5, null]);
out = quotes.joinAsof(trades, {on:"dates", by:"ticker"});
expect(out.getColumn("bid_right").toArray()).toEqual([
null,
51.95,
51.95,
51.95,
720.92,
98.0,
720.92,
51.95,
]);
out = quotes.joinAsof(trades, {on:"dates", strategy:"backward", tolerance:"5ms"})[
"bid_right"
].toArray();
expect(out).toEqual([51.95, 51.95, null, 51.95, 98.0, 98.0, null, null]);
out = quotes.joinAsof(trades, {on:"dates", strategy:"forward", tolerance:"5ms"})[
"bid_right"
].toArray();
expect(out).toEqual([51.95, 51.95, null, null, 720.77, null, null, null]);
});
test("asofjoin tolerance grouper", () => {

const df1 = pl.DataFrame({"date": [new Date(2020, 1, 5), new Date(2020, 1, 10)], "by": [1, 1]});
const df2 = pl.DataFrame(
{
"date": [new Date(2020, 1, 5), new Date(2020, 1, 6)],
"by": [1, 1],
"values": [100, 200],
}
);

const out = df1.joinAsof(df2, {by: "by", on:"date", tolerance:"3d"});

const expected = pl.DataFrame(
{
"date": [new Date(2020, 1, 5), new Date(2020, 1, 10)],
"by": [1, 1],
"values": [100, null],
}
);

expect(out).toFrameEqual(expected);

});
});
6 changes: 3 additions & 3 deletions nodejs-polars/__tests__/expr.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ const df = () => {
return df.withColumns(

pl.col("date").cast(pl.Date),
pl.col("datetime").cast(pl.Datetime),
pl.col("datetime").cast(pl.Datetime("ms")),
pl.col("strings").cast(pl.Categorical)
.alias("cat")
);
Expand Down Expand Up @@ -1347,7 +1347,7 @@ describe("expr.str", () => {

const datetimeSeries = df.getColumn("timestamp")
.str
.strptime(pl.Datetime, "%FT%T%.3f%:z")
.strptime(pl.Datetime("ms"), "%FT%T%.3f%:z")
.rename("datetime");
const dateSeries = df.getColumn("timestamp")
.str
Expand All @@ -1359,7 +1359,7 @@ describe("expr.str", () => {
const actual = df.select(
col("timestamp")
.str
.strptime(pl.Datetime, "%FT%T%.3f%:z")
.strptime(pl.Datetime("ms"), "%FT%T%.3f%:z")
.as("datetime"),
col("timestamp")
.str
Expand Down
2 changes: 1 addition & 1 deletion nodejs-polars/__tests__/groupby.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ describe("groupby ops", () => {

const df = pl
.DataFrame({"dt": dates, "a": [3, 7, 5, 9, 2, 1]})
.withColumn(pl.col("dt").str.strptime(pl.Datetime));
.withColumn(pl.col("dt").str.strptime(pl.Datetime("ms")));

const a = pl.col("a");
const out = df.groupByRolling({indexColumn:"dt", period:"2d"}).agg(
Expand Down
8 changes: 4 additions & 4 deletions nodejs-polars/__tests__/io.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ describe("read:csv", () => {
2021-01-01 00:30:00,0.00298300,0.00300100
2021-01-01 00:45:00,0.00299400,0.00304000`;
const df = pl.readCSV(csv, {parseDates: true});
expect(df.dtypes).toEqual([pl.Datetime, pl.Float64, pl.Float64]);
expect(df.dtypes.map(dt => dt.toJSON())).toEqual([pl.Datetime("us").toJSON(), pl.Float64.toJSON(), pl.Float64.toJSON()]);
});
it.each`
csv | nullValues
Expand Down Expand Up @@ -142,7 +142,7 @@ describe("scan", () => {
nRows: 4
}).writeParquet(parquetpath);

const df = pl.readParquet(parquetpath);
const df = pl.scanParquet(parquetpath).collectSync();

expect(df.shape).toEqual({height: 4, width: 4});
});
Expand Down Expand Up @@ -174,7 +174,7 @@ describe("parquet", () => {
});

test("read:options", () => {
const df = pl.readParquet(parquetpath, {nRows: 4});
const df = pl.readParquet(parquetpath, {numRows: 4});
expect(df.shape).toEqual({height: 4, width: 4});
});

Expand All @@ -184,7 +184,7 @@ describe("parquet", () => {
});

test("scan:options", () => {
const df = pl.scanParquet(parquetpath, {nRows: 4}).collectSync();
const df = pl.scanParquet(parquetpath, {numRows: 4}).collectSync();
expect(df.shape).toEqual({height: 4, width: 4});
});
});
Expand Down
3 changes: 2 additions & 1 deletion nodejs-polars/__tests__/serde.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import pl from "@polars";
import path from "path";
describe("serde", () => {
test("lazyframe:json", () => {
const df = pl.scanCSV("../examples/datasets/foods1.csv");
const df = pl.scanCSV(path.resolve("../examples/datasets/foods1.csv"));
const buf = df.serialize("json");
const deserde = pl.LazyDataFrame.deserialize(buf, "json");
const expected = df.collectSync();
Expand Down
2 changes: 1 addition & 1 deletion nodejs-polars/__tests__/series.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ describe("series", () => {
${[1n, 2n, 3n]} | ${pl.UInt64} | ${"bigint"}
${[true, false]} | ${pl.Bool} | ${"boolean"}
${[]} | ${pl.Float64} | ${"empty"}
${[new Date(Date.now())]} | ${pl.Datetime} | ${"Date"}
${[new Date(Date.now())]} | ${pl.Datetime("ms")} | ${"Date"}
`("defaults to $dtype for \"$type\"", ({ values, dtype}) => {
const name = chance.string();
const s = pl.Series(name, values);
Expand Down
8 changes: 4 additions & 4 deletions nodejs-polars/__tests__/setup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import pl from "@polars/index";
expect.extend({
toSeriesStrictEqual(actual, expected){
const seriesEq = actual.seriesEqual(expected);
const typesEq = actual.dtype === expected.dtype;
const typesEq = actual.dtype.equals(expected.dtype);
if(seriesEq && typesEq) {
return {
message: () => "series matches",
Expand Down Expand Up @@ -40,8 +40,8 @@ Received:
};
}
},
toFrameEqual(actual, expected) {
const pass = actual.frameEqual(expected);
toFrameEqual(actual, expected, nullEqual?) {
const pass = actual.frameEqual(expected, nullEqual);
if(pass) {
return {
message: () => "dataframes match",
Expand Down Expand Up @@ -121,7 +121,7 @@ export const df = () => {
return df.withColumns(

pl.col("date").cast(pl.Date),
pl.col("datetime").cast(pl.Datetime),
pl.col("datetime").cast(pl.Datetime("ms")),
pl.col("strings").cast(pl.Categorical)
.alias("cat")
);
Expand Down

0 comments on commit f94b29e

Please sign in to comment.