More work, mainly on typing related aspects

One big priority was removing optional fields from AST nodes and tagging AST nodes with `global` properties (like `keepArray`, see below). I haven't changed the AST definitions at all. The `type` and `value` fields should have the same values they always had. However, in the types I'm using tagged unions for all the AST nodes. Generally, the `type` field is used to narrow the type. But in some cases, I further refine type by `value` (which you can really think of as a subtype for the AST nodes). This basically allows me to narrow the type further in the code and do more static checking during compilation to make sure that the proper fields are being read and/or written. One "breaking change" is with the parser recovery tests. These aren't really breaking in the sense that they would impact most users. They have to do with including some additional lexical information. This information was, generally speaking, already there. But there were cases where it wasn't included for some AST nodes. So the change is really just including it consistently. Another noticeable change is with error handling. Previously, the syntax errors were annotated on the root node of the AST. I've changed that so they don't impact the AST. I've also changed the error semantics very slightly. Now, if there are no errors, the `errors` field in an expression will return an empty array (previously, it returned `undefined`). One completely internal change was the elimination of the `keepArray` attribute on AST nodes. This was added whenever the `[]` was found in an expression. I've changed the handling so that instead of having the parser walk through the AST and find the head of the path that contains the `[]` operator and then set the `keepArray` attribute on it, I'm simply adding a special "decorator" AST node that is optimized away eventually. This keeps the parser focused just on building the initial AST and leaves the semantics of the `SingletonArrayDecorator` (the node that gets inserted as a result of an `[]` in a path) to be handled completely during `ast_optimize`. It is important to note that this special AST node is always optimized away. In `ast_optimize` I generate a new AST node as a result of optimization in each `case` of the `switch` statement. This is mainly to keep the typing clean (so we know precisely what we are working with at any given time).
jsonata-js · Jan 25, 2018 · f5420d9 · f5420d9
1 parent 33d5430
commit f5420d9
Show file tree

Hide file tree

Showing 16 changed files with 957 additions and 637 deletions.
diff --git a/__tests__/parser-recovery.ts b/__tests__/parser-recovery.ts
diff --git a/package.json b/package.json
@@ -52,15 +52,13 @@
     "request": "^2.81.0",
     "rollup": "^0.54.1",
     "ts-jest": "21.2.3",
-    "uglify-es": "^3.0.20"
+    "uglify-es": "^3.0.20",
+    "@types/node": "^9.3.0",
+    "typescript": "^2.6.2"
   },
   "engines": {
     "node": ">= 4"
   },
-  "dependencies": {
-    "@types/node": "^9.3.0",
-    "typescript": "^2.6.2"
-  },
   "jest": {
     "globals": {
       "ts-jest": {

diff --git a/src/ast.ts b/src/ast.ts
@@ -0,0 +1,206 @@
+import { Token } from "./tokenizer";
+import { Signature } from './signatures';
+
+// Potential AST changes
+//
+//   - Make predicate and group into AST nodes instead of optional fields on every node.
+//   - Change unary operator "[" to a different type...?
+//   - Get errors? off of BaseNode
+//   - Rationalize unary nodes
+
+export interface BaseNode {
+    type: string;
+    value: any;
+    position: number;
+    // This gets added to nodes to indicate how a value (assuming it is an object)
+    // should be grouped.
+    // TODO: Rename lhs...?
+    group?: { lhs: ASTNode[][], position: number };
+    // This gets added to nodes to specify a list of predicates to filter on.
+    predicate?: ASTNode[];
+}
+
+export interface WildcardNode extends BaseNode {
+    type: "wildcard";
+}
+
+export interface DescendantNode extends BaseNode {
+    type: "descendant";
+}
+
+export interface ErrorFields {
+    code: string;
+    position?: number;
+    token?: string;
+    stack?: string;
+}
+
+export interface ErrorNode extends BaseNode {
+    type: "error";
+    error: ErrorFields;
+    lhs: ASTNode;
+    remaining: Token[];
+}
+
+export interface VariableNode extends BaseNode {
+    type: "variable";
+}
+
+export interface NameNode extends BaseNode {
+    type: "name";
+}
+export interface LiteralNode extends BaseNode {
+    type: "literal";
+}
+
+export interface RegexNode extends BaseNode {
+    type: "regex";
+}
+
+export interface OperatorNode extends BaseNode {
+    type: "operator";
+}
+
+export interface EndNode extends BaseNode {
+    type: "end";
+    value: string;
+}
+
+export type TerminalNode = VariableNode | NameNode | LiteralNode | RegexNode | OperatorNode | EndNode;
+
+export interface UnaryMinusNode extends BaseNode {
+    type: "unary";
+    value: "-";
+    expression: ASTNode;
+}
+
+export interface ArrayConstructorNode extends BaseNode {
+    type: "unary";
+    value: "[";
+    expressions: ASTNode[];
+    consarray: boolean;
+}
+
+export interface UnaryObjectNode extends BaseNode {
+    type: "unary";
+    value: "{";
+    lhs: ASTNode[][];
+}
+
+export type UnaryNode = UnaryMinusNode | ArrayConstructorNode | UnaryObjectNode;
+
+export interface BinaryOperationNode extends BaseNode {
+    type: "binary";
+    value: "+" | "-" | "*" | "/" | "[" | ".." | "." | "[" | ":=" | "~>"; // TODO: There must be more?!? (e.g., comparisons)
+    lhs: ASTNode;
+    rhs: ASTNode; // ASTNode if operator is "." | "[" | ":=" | "~>"
+}
+
+export interface BinaryObjectNode extends BaseNode {
+    type: "binary";
+    value: "{";
+    lhs: ASTNode;
+    rhs: ASTNode[]; // ASTNode[] if operator is "{"
+}
+
+export type BinaryNode = BinaryOperationNode | BinaryObjectNode;
+
+export interface SortTerm {
+    descending: boolean;
+    expression: ASTNode;
+}
+
+export interface SortNode extends BaseNode {
+    type: "sort";
+    lhs: ASTNode;
+    rhs: SortTerm[];
+}
+
+export interface TernaryNode extends BaseNode {
+    type: "condition";
+    condition: ASTNode;
+    then: ASTNode;
+    else: ASTNode;
+    position: number;
+}
+
+export interface BlockNode extends BaseNode {
+    type: "block";
+    expressions: ASTNode[];
+}
+
+export interface TransformNode extends BaseNode {
+    type: "transform";
+    pattern: ASTNode;
+    update: ASTNode;
+    delete?: ASTNode;
+}
+
+export interface FunctionInvocationNode extends BaseNode {
+    type: "function" | "partial";
+    procedure: ASTNode;
+    arguments: ASTNode[];
+    // This is added when creating PathNodes.
+    nextFunction?: Function;
+}
+
+export interface LambdaDefinitionNode extends BaseNode {
+    type: "lambda";
+    body: ASTNode;
+    signature: Signature;
+    arguments: ASTNode[];
+    thunk: boolean;
+}
+
+export interface SingletonArrayDecorator extends BaseNode {
+    type: "singleton";
+    next: ASTNode;
+}
+
+// This type of node only appears after the AST is optimized
+export interface PathNode extends BaseNode {
+    type: "path";
+    steps: ASTNode[];
+    keepSingletonArray: boolean,
+}
+
+export interface BindNode extends BaseNode {
+    type: "bind";
+    lhs: ASTNode;
+    rhs: ASTNode;
+}
+
+export interface ApplyNode extends BaseNode {
+    type: "apply";
+    lhs: ASTNode;
+    rhs: ASTNode;
+}
+
+/**
+ * These are the AST nodes that come directly out of the parser before
+ * ast_optimize is called.
+ */
+export type ASTNode =
+    | WildcardNode
+    | DescendantNode
+    | ErrorNode
+    | LiteralNode
+    | NameNode
+    | VariableNode
+    | RegexNode
+    | OperatorNode
+    | UnaryNode
+    | BinaryNode
+    | BinaryObjectNode
+    | SortNode
+    | TernaryNode
+    | BlockNode
+    | TransformNode
+    | FunctionInvocationNode
+    | LambdaDefinitionNode
+    | PathNode
+    | BindNode
+    | ApplyNode
+    | EndNode
+    | SingletonArrayDecorator;
+
diff --git a/src/evaluate.ts b/src/evaluate.ts
@@ -11,8 +11,9 @@ import {
     toSequence,
 } from "./utils";
 import { defineFunction } from "./signatures";
-import { parser } from "./parser";
+import { parser } from './parser';
 import { functionBoolean, functionAppend, functionString, functionSort, createStandardFrame } from "./functions";
+// import * as ast from "./ast";
 
 /**
  * Evaluate expression against input data

diff --git a/src/jsonata.ts b/src/jsonata.ts
@@ -3,6 +3,7 @@ import { lookupMessage, createFrame } from './utils';
 import { evaluate } from './evaluate';
 import { defineFunction } from './signatures';
 import { createStandardFrame } from './functions';
+import { ASTNode } from './ast';
 
 export interface Options {
     recover: boolean;
@@ -27,12 +28,10 @@ export type AST = any;
  * @returns {{evaluate: evaluate, assign: assign}} Evaluated expression
  */
 export function jsonata(expr: string, options?: Partial<Options>): Expression {
-    var ast;
-    var errors;
+    let ast: undefined | ASTNode = undefined;
+    let errors: string[] = [];
     try {
-        ast = parser(expr, options && options.recover);
-        errors = ast.errors;
-        delete ast.errors;
+        ast = parser(expr, errors, options && options.recover);
     } catch (err) {
         // insert error message into structure
         err.message = lookupMessage(err);
@@ -59,7 +58,7 @@ export function jsonata(expr: string, options?: Partial<Options>): Expression {
     return {
         evaluate: function(input, bindings, callback) {
             // throw if the expression compiled with syntax errors
-            if (typeof errors !== "undefined") {
+            if (typeof errors !== "undefined" && errors.length>0) {
                 var err: any = {
                     code: "S0500",
                     position: 0,