From 95e2e1a638977e0735625d55be2c25d296050847 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Sun, 5 Sep 2021 10:07:28 -0700 Subject: [PATCH 1/9] =?UTF-8?q?coerce=20to=20the=20scale=E2=80=99s=20type?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/scales.js | 61 ++++++++++++++++++++++++++++- test/output/aaplCloseUntyped.svg | 67 ++++++++++++++++++++++++++++++++ test/plots/aapl-close-untyped.js | 19 +++++++++ test/plots/index.js | 1 + 4 files changed, 147 insertions(+), 1 deletion(-) create mode 100644 test/output/aaplCloseUntyped.svg create mode 100644 test/plots/aapl-close-untyped.js diff --git a/src/scales.js b/src/scales.js index dec9731463..14a7ed06fc 100644 --- a/src/scales.js +++ b/src/scales.js @@ -58,7 +58,37 @@ function autoScaleRound(scale) { } function Scale(key, channels = [], options = {}) { - switch (inferScaleType(key, channels, options)) { + const type = inferScaleType(key, channels, options); + + // Once the scale type is known, coerce the associated channel values and any + // explicitly-specified domain to the expected type. + switch (type) { + case "diverging": + case "diverging-sqrt": + case "diverging-pow": + case "diverging-log": + case "diverging-symlog": + case "cyclical": + case "sequential": + case "linear": + case "sqrt": + case "threshold": + case "quantile": + case "pow": + case "log": + case "symlog": + options = coerceType(channels, options, coerceNumber, Float64Array); + break; + case "identity": + if (registry.get(key) === position) options = coerceType(channels, options, coerceNumber, Float64Array); + break; + case "utc": + case "time": + options = coerceType(channels, options, coerceDate); + break; + } + + switch (type) { case "diverging": return ScaleDiverging(key, channels, options); case "diverging-sqrt": return ScaleDivergingSqrt(key, channels, options); case "diverging-pow": return ScaleDivergingPow(key, channels, options); @@ -144,3 +174,32 @@ export function isCollapsed(scale) { } return true; } + +// Mutates channel.value! +function coerceType(channels, options, coerce, type) { + for (const c of channels) c.value = coerceArray(c.value, coerce, type); + return {...options, domain: coerceArray(options.domain, coerce, type)}; +} + +function coerceArray(array, coerce, type = Array) { + if (array !== undefined) return type.from(array, coerce); +} + +// Unlike Mark’s number, here we want to convert null and undefined to NaN, +// since the result will be stored in a Float64Array and we don’t want null to +// be coerced to zero. +function coerceNumber(x) { + return x == null ? NaN : +x; +} + +// When coercing strings to dates, we only want to allow the ISO 8601 format +// since the built-in string parsing of the Date constructor varies across +// browsers. (In the future, this could be made more liberal if desired, though +// it is still generally preferable to do date parsing yourself explicitly, +// rather than rely on Plot.) Any non-string values are coerced to number first +// and treated as milliseconds since UNIX epoch. +function coerceDate(x) { + return x instanceof Date ? x + : typeof x === "string" && /^([-+]\d{2})?\d{4}(-\d{2}(-\d{2})?)?(T\d{2}:\d{2}(:\d{2}(\.\d{3})?)?(Z|[-+]\d{2}:\d{2})?)?$/.test(x) ? new Date(x) + : new Date(x == null ? NaN : +x); +} diff --git a/test/output/aaplCloseUntyped.svg b/test/output/aaplCloseUntyped.svg new file mode 100644 index 0000000000..3800f7e59e --- /dev/null +++ b/test/output/aaplCloseUntyped.svg @@ -0,0 +1,67 @@ + + + + + 0 + + + + 20 + + + + 40 + + + + 60 + + + + 80 + + + + 100 + + + + 120 + + + + 140 + + + + 160 + + + + 180 + ↑ Close + + + + 2014 + + + 2015 + + + 2016 + + + 2017 + + + 2018 + + + + + + + + + \ No newline at end of file diff --git a/test/plots/aapl-close-untyped.js b/test/plots/aapl-close-untyped.js new file mode 100644 index 0000000000..ee0fb270b0 --- /dev/null +++ b/test/plots/aapl-close-untyped.js @@ -0,0 +1,19 @@ +import * as Plot from "@observablehq/plot"; +import * as d3 from "d3"; + +export default async function() { + const AAPL = await d3.csv("data/aapl.csv"); + return Plot.plot({ + x: { + type: "utc" + }, + y: { + type: "linear", + grid: true + }, + marks: [ + Plot.line(AAPL, {x: "Date", y: "Close"}), + Plot.ruleY([0]) + ] + }); +} diff --git a/test/plots/index.js b/test/plots/index.js index a74e855f11..b82ed6e311 100644 --- a/test/plots/index.js +++ b/test/plots/index.js @@ -1,6 +1,7 @@ export {default as aaplCandlestick} from "./aapl-candlestick.js"; export {default as aaplChangeVolume} from "./aapl-change-volume.js"; export {default as aaplClose} from "./aapl-close.js"; +export {default as aaplCloseUntyped} from "./aapl-close-untyped.js"; export {default as aaplMonthly} from "./aapl-monthly.js"; export {default as aaplVolume} from "./aapl-volume.js"; export {default as anscombeQuartet} from "./anscombe-quartet.js"; From dfde475ff5244f4c00691a93ab9d8311dd82c3b7 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Mon, 6 Sep 2021 13:11:48 -0700 Subject: [PATCH 2/9] upgrade isoformat --- README.md | 2 +- package.json | 2 +- src/format.js | 6 +++++- src/scales.js | 3 ++- yarn.lock | 8 ++++---- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 1ef9354a64..40ef1d0721 100644 --- a/README.md +++ b/README.md @@ -1532,7 +1532,7 @@ These helper functions are provided for use as a *scale*.tickFormat [axis option Plot.formatIsoDate(new Date("2020-01-01T00:00.000Z")) // "2020-01-01" ``` -Given a *date*, returns the shortest equivalent ISO 8601 UTC string. +Given a *date*, returns the shortest equivalent ISO 8601 UTC string. If the given *date* is not valid, returns `"Invalid Date"`. #### Plot.formatWeekday(*locale*, *format*) diff --git a/package.json b/package.json index b70b4f0c3e..8f1e1af76c 100644 --- a/package.json +++ b/package.json @@ -51,7 +51,7 @@ }, "dependencies": { "d3": "^7.0.0", - "isoformat": "^0.1.0" + "isoformat": "^0.2.0" }, "engines": { "node": ">=12" diff --git a/src/format.js b/src/format.js index 2b7334eab4..737415bbf0 100644 --- a/src/format.js +++ b/src/format.js @@ -1,4 +1,4 @@ -export {default as formatIsoDate} from "isoformat"; +import {format as isoFormat} from "isoformat"; export function formatMonth(locale = "en-US", month = "short") { const format = new Intl.DateTimeFormat(locale, {timeZone: "UTC", month}); @@ -17,3 +17,7 @@ export function formatWeekday(locale = "en-US", weekday = "short") { } }; } + +export function formatIsoDate(date) { + return isoFormat(date, "Invalid Date"); +} diff --git a/src/scales.js b/src/scales.js index 14a7ed06fc..c85cfc87b5 100644 --- a/src/scales.js +++ b/src/scales.js @@ -4,6 +4,7 @@ import {ScaleDiverging, ScaleDivergingSqrt, ScaleDivergingPow, ScaleDivergingLog import {ScaleTime, ScaleUtc} from "./scales/temporal.js"; import {ScaleOrdinal, ScalePoint, ScaleBand} from "./scales/ordinal.js"; import {isOrdinal, isTemporal} from "./mark.js"; +import {parse as isoParse} from "isoformat"; export function Scales(channels, {inset, round, nice, align, padding, ...options} = {}) { const scales = {}; @@ -200,6 +201,6 @@ function coerceNumber(x) { // and treated as milliseconds since UNIX epoch. function coerceDate(x) { return x instanceof Date ? x - : typeof x === "string" && /^([-+]\d{2})?\d{4}(-\d{2}(-\d{2})?)?(T\d{2}:\d{2}(:\d{2}(\.\d{3})?)?(Z|[-+]\d{2}:\d{2})?)?$/.test(x) ? new Date(x) + : typeof x === "string" ? isoParse(x) : new Date(x == null ? NaN : +x); } diff --git a/yarn.lock b/yarn.lock index b2e1250dad..f511225f26 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2243,10 +2243,10 @@ isexe@^2.0.0: resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10" integrity sha1-6PvzdNxVb/iUehDcsFctYz8s+hA= -isoformat@^0.1.0: - version "0.1.0" - resolved "https://registry.yarnpkg.com/isoformat/-/isoformat-0.1.0.tgz#b693c1c9ee9ab02f1af5af41ceeae52bf501b233" - integrity sha512-4wCSk50Ov1PKbZ2m+YN0rUgQfF4NRkIavbhpW1mANEqD9HxBZ+j/fWk8hERq1yxn+CfWqvOac4m9axLuF0NfEw== +isoformat@^0.2.0: + version "0.2.0" + resolved "https://registry.yarnpkg.com/isoformat/-/isoformat-0.2.0.tgz#52c3dce6c281adb6cb7f060895a731b7b2d52c1b" + integrity sha512-iyxQ94xMvUZryoHVaXg/TSLM318/aO7xS7Ute+t4MkvZ17IDfe9MkI/MQuu7XgxbmTiGkeggNj+1f6wmxF876Q== isstream@~0.1.2: version "0.1.2" From 1b3428ca67c662845411c5da8e9cbae3250cad2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 6 Sep 2021 22:59:15 +0200 Subject: [PATCH 3/9] document type coercion --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 40ef1d0721..0b278ed9dc 100644 --- a/README.md +++ b/README.md @@ -165,6 +165,8 @@ For ordinal data (*e.g.*, strings), use the *ordinal* scale type or the *point* You can opt-out of a scale using the *identity* scale type. This is useful if you wish to specify literal colors or pixel positions within a mark channel rather than relying on the scale to convert abstract values into visual values. For position scales (*x* and *y*), an *identity* scale is still quantitative and may produce an axis, yet unlike a *linear* scale the domain and range are fixed based on the plot layout. +Quantitative scales, as well as *identity* for position scales, will coerce string values to numbers; time scales will likewise coerce string values to a date using [isoformat.parse](https://github.com/mbostock/isoformat/blob/main/README.md#parsedate-fallback), provided they are formatted as ISO 8601. + A scale’s domain (the extent of its inputs, abstract values) and range (the extent of its outputs, visual values) are typically inferred automatically. You can set them explicitly using these options: * *scale*.**domain** - typically [*min*, *max*], or an array of ordinal or categorical values From 25ff424119d7900d79697c527aae9d7d0291d4aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 6 Sep 2021 23:03:20 +0200 Subject: [PATCH 4/9] language --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0b278ed9dc..dca2f8b39b 100644 --- a/README.md +++ b/README.md @@ -165,7 +165,7 @@ For ordinal data (*e.g.*, strings), use the *ordinal* scale type or the *point* You can opt-out of a scale using the *identity* scale type. This is useful if you wish to specify literal colors or pixel positions within a mark channel rather than relying on the scale to convert abstract values into visual values. For position scales (*x* and *y*), an *identity* scale is still quantitative and may produce an axis, yet unlike a *linear* scale the domain and range are fixed based on the plot layout. -Quantitative scales, as well as *identity* for position scales, will coerce string values to numbers; time scales will likewise coerce string values to a date using [isoformat.parse](https://github.com/mbostock/isoformat/blob/main/README.md#parsedate-fallback), provided they are formatted as ISO 8601. +Quantitative scales, as well as *identity* for position scales, will coerce string values to numbers; time scales will likewise coerce string values to dates using [isoformat.parse](https://github.com/mbostock/isoformat/blob/main/README.md#parsedate-fallback), provided they are formatted as ISO 8601. A scale’s domain (the extent of its inputs, abstract values) and range (the extent of its outputs, visual values) are typically inferred automatically. You can set them explicitly using these options: From e626354254ad4b95937dd23f1791d6a935ddfa24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 6 Sep 2021 23:07:39 +0200 Subject: [PATCH 5/9] changelog --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4c456013e..bbd018a051 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Observable Plot - Changelog +## Forthcoming + +*Not yet released.* These notes are a work in progress. + +### Scales + +Quantitative scales, as well as *identity* for position scales, will now coerce string values to numbers; time scales will likewise coerce string values to dates using [isoformat.parse](https://github.com/mbostock/isoformat/blob/main/README.md#parsedate-fallback), provided they are formatted as ISO 8601. + ## 0.2.0 Released August 20, 2021. From 24d0897728dd0fe5f18769ddb7ff224752ea3645 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Mon, 6 Sep 2021 18:49:08 -0700 Subject: [PATCH 6/9] update CHANGELOG --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bbd018a051..a931e6532f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,12 @@ # Observable Plot - Changelog -## Forthcoming +## 0.3.0 *Not yet released.* These notes are a work in progress. ### Scales -Quantitative scales, as well as *identity* for position scales, will now coerce string values to numbers; time scales will likewise coerce string values to dates using [isoformat.parse](https://github.com/mbostock/isoformat/blob/main/README.md#parsedate-fallback), provided they are formatted as ISO 8601. +Quantitative scales, as well as identity position scales, now coerce channel values to numbers; both null and undefined are coerced to NaN. Similarly, time scales coerce channel values to dates; numbers are assumed to be milliseconds since UNIX epoch, while strings are assumed to be in [ISO 8601 format](https://github.com/mbostock/isoformat/blob/main/README.md#parsedate-fallback). ## 0.2.0 From 11c32289348aa2f97417b55437b4e192385f59a8 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Mon, 6 Sep 2021 18:54:54 -0700 Subject: [PATCH 7/9] coerce invalid dates to undefined --- src/scales.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/scales.js b/src/scales.js index c85cfc87b5..ba75e184a6 100644 --- a/src/scales.js +++ b/src/scales.js @@ -200,7 +200,8 @@ function coerceNumber(x) { // rather than rely on Plot.) Any non-string values are coerced to number first // and treated as milliseconds since UNIX epoch. function coerceDate(x) { - return x instanceof Date ? x + return x instanceof Date && !isNaN(x) ? x : typeof x === "string" ? isoParse(x) - : new Date(x == null ? NaN : +x); + : x == null || isNaN(x = +x) ? undefined + : new Date(x); } From 916d04bbef6d684f0dcf53cd954d6a3165b269bd Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Mon, 6 Sep 2021 19:09:42 -0700 Subject: [PATCH 8/9] update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index dca2f8b39b..98a432e1f4 100644 --- a/README.md +++ b/README.md @@ -165,7 +165,7 @@ For ordinal data (*e.g.*, strings), use the *ordinal* scale type or the *point* You can opt-out of a scale using the *identity* scale type. This is useful if you wish to specify literal colors or pixel positions within a mark channel rather than relying on the scale to convert abstract values into visual values. For position scales (*x* and *y*), an *identity* scale is still quantitative and may produce an axis, yet unlike a *linear* scale the domain and range are fixed based on the plot layout. -Quantitative scales, as well as *identity* for position scales, will coerce string values to numbers; time scales will likewise coerce string values to dates using [isoformat.parse](https://github.com/mbostock/isoformat/blob/main/README.md#parsedate-fallback), provided they are formatted as ISO 8601. +Quantitative scales, as well as identity position scales, now coerce channel values to numbers; both null and undefined are coerced to NaN. Similarly, time scales coerce channel values to dates; numbers are assumed to be milliseconds since UNIX epoch, while strings are assumed to be in [ISO 8601 format](https://github.com/mbostock/isoformat/blob/main/README.md#parsedate-fallback). A scale’s domain (the extent of its inputs, abstract values) and range (the extent of its outputs, visual values) are typically inferred automatically. You can set them explicitly using these options: From 9d7cde37d456661b0c13527a31f68a62ca259e44 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Mon, 6 Sep 2021 19:22:18 -0700 Subject: [PATCH 9/9] update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 98a432e1f4..710a602f3d 100644 --- a/README.md +++ b/README.md @@ -165,7 +165,7 @@ For ordinal data (*e.g.*, strings), use the *ordinal* scale type or the *point* You can opt-out of a scale using the *identity* scale type. This is useful if you wish to specify literal colors or pixel positions within a mark channel rather than relying on the scale to convert abstract values into visual values. For position scales (*x* and *y*), an *identity* scale is still quantitative and may produce an axis, yet unlike a *linear* scale the domain and range are fixed based on the plot layout. -Quantitative scales, as well as identity position scales, now coerce channel values to numbers; both null and undefined are coerced to NaN. Similarly, time scales coerce channel values to dates; numbers are assumed to be milliseconds since UNIX epoch, while strings are assumed to be in [ISO 8601 format](https://github.com/mbostock/isoformat/blob/main/README.md#parsedate-fallback). +Quantitative scales, as well as identity position scales, coerce channel values to numbers; both null and undefined are coerced to NaN. Similarly, time scales coerce channel values to dates; numbers are assumed to be milliseconds since UNIX epoch, while strings are assumed to be in [ISO 8601 format](https://github.com/mbostock/isoformat/blob/main/README.md#parsedate-fallback). A scale’s domain (the extent of its inputs, abstract values) and range (the extent of its outputs, visual values) are typically inferred automatically. You can set them explicitly using these options: