Split rules into their own module.

Mostly for ease of navigation
mozilla · Jun 20, 2017 · 32b8ad4 · 32b8ad4
1 parent 2e232da
commit 32b8ad4
Show file tree

Hide file tree

Showing 3 changed files with 314 additions and 304 deletions.
diff --git a/index.js b/index.js
@@ -2,7 +2,8 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-const {rule, ruleset} = require('./ruleset');
+const {rule} = require('./rule');
+const {ruleset} = require('./ruleset');
 const {dom} = require('./lhs');
 const {out} = require('./rhs');
 const {and, atMost, conserveScore, max, note, props, score, type, typeIn} = require('./side');

diff --git a/rule.js b/rule.js
@@ -0,0 +1,309 @@
+const {forEach, map} = require('wu');
+
+const {NiceSet, setDefault} = require('./utils');
+const {OutwardRhs} = require('./rhs');
+
+
+/**
+ * Construct and return the proper type of rule class based on the
+ * inwardness/outwardness of the RHS.
+ */
+function rule(lhs, rhs) {
+    // Since out() is a valid call only on the RHS (unlike type()), we can take
+    // a shortcut here: any outward RHS will already be an OutwardRhs; we don't
+    // need to sidetrack it through being a Side. And OutwardRhs has an asRhs()
+    // that just returns itself.
+    return new ((rhs instanceof OutwardRhs) ? OutwardRule : InwardRule)(lhs, rhs);
+}
+
+/**
+ * We place the in/out distinction in Rules because it determines whether the
+ * RHS result is cached, and Rules are responsible for maintaining the rulewise
+ * cache ruleset.ruleCache.
+ */
+class Rule {  // abstract
+    constructor(lhs, rhs) {
+        this.lhs = lhs.asLhs();
+        this.rhs = rhs.asRhs();
+    }
+
+    /**
+     * Return a NiceSet of the rules that this one shallowly depends on in the
+     * given ruleset. In a BoundRuleset, this may include rules that have
+     * already been executed.
+     *
+     * Depend on emitters of any LHS type this rule finalizes. (See
+     * _typesFinalized for a definition.) Depend on adders of any other LHS
+     * types (because, after all, we need to know what nodes have that type in
+     * order to find the set of LHS nodes). This works for simple rules and
+     * complex ones like and().
+     *
+     * Specific examples (where A is a type):
+     * * A.max->* depends on anything emitting A.
+     * * Even A.max->A depends on A emitters, because we have to have all the
+     *   scores factored in first. For example, what if we did
+     *   max(A)->score(.5)?
+     * * A->A depends on anything adding A.
+     * * A->(something other than A) depends on anything emitting A. (For
+     *   example, we need the A score finalized before we could transfer it to
+     *   B using conserveScore().)
+     * * A->out() also depends on anything emitting A. Fnode methods aren't
+     *   smart enough to lazily run emitter rules as needed. We could make them
+     *   so if it was shown to be an advantage.
+     */
+    prerequisites(ruleset) {
+        // Optimization: we could cache the result of this when in a compiled (immutable) ruleset.
+
+        // Extend prereqs with rules derived from each of the give types. If no
+        // rules are found, raise an exception, as that indicates a malformed
+        // ruleset.
+        function extendOrThrow(prereqs, types, ruleGetter, verb) {
+            for (let type of types) {
+                const rules = ruleGetter(type);
+                if (rules.length > 0) {
+                    prereqs.extend(rules);
+                } else {
+                    throw new Error(`No rule ${verb} the "${type}" type, but another rule needs it as input.`);
+                }
+            }
+        }
+
+        const prereqs = new NiceSet();
+
+        // Add finalized types:
+        extendOrThrow(prereqs, this._typesFinalized(), type => ruleset.inwardRulesThatCouldEmit(type), 'emits');
+
+        // Add mentioned types:
+        // We could say this.lhs.typesMentioned().minus(typesFinalized) as an
+        // optimization. But since types mentioned are a superset of types
+        // finalized and rules adding are a subset of rules emitting, we get
+        // the same result without.
+        extendOrThrow(prereqs, this.lhs.typesMentioned(), type => ruleset.inwardRulesThatCouldAdd(type), 'adds');
+
+        return prereqs;
+    }
+
+    /**
+     * Return the types that this rule finalizes.
+     *
+     * To "finalize" a type means to make sure we're finished running all
+     * possible rules that might change a node's score or notes w.r.t. a given
+     * type. This is generally done because we're about to use those data for
+     * something, like computing a new type's score or or an aggregate
+     * function. Exhaustively, we're about to...
+     * * change the type of the nodes or
+     * * aggregate all nodes of a type
+     *
+     * This base-class implementation just returns what aggregate functions
+     * need, since that need spans inward and outward rules.
+     *
+     * @return Set of types
+     */
+    _typesFinalized() {
+        // Get the types that are fed to aggregate functions. Aggregate
+        // functions are more demanding than a simple type() LHS. A type() LHS
+        // itself does not finalize its nodes because the things it could do to
+        // them without changing their type (adding notes, multiplying score)
+        // are immutable or commutative (respectively). Thus, we require a RHS
+        // type change in order to require finalization of a simple type()
+        // mention. A max(B), OTOH, is not commutative with other B->B rules
+        // (imagine type(B).max()->score(.5)), so it must depend on B emitters
+        // and thus finalize B. (This will have to be relaxed or rethought when
+        // we do the max()/atMost() optimization. Perhaps we can delegate to
+        // aggregate functions up in Rule.prerequisites() to ask what their
+        // prereqs are. If they implement such an optimization, they can reply.
+        // Otherwise, we can assume they are all the nodes of their type.)
+        //
+        // TODO: Could arbitrary predicates (once we implement those) matter
+        // too? Maybe it's not just aggregations.
+        const type = this.lhs.aggregatedType();
+        return (type === undefined) ? new NiceSet() : new NiceSet([type]);
+    }
+}
+
+/**
+ * A normal rule, whose results head back into the Fathom knowledgebase, to be
+ * operated on by further rules.
+ */
+class InwardRule extends Rule {
+    // TODO: On construct, complain about useless rules, like a dom() rule that
+    // doesn't assign a type.
+
+    /**
+     * Return an iterable of the fnodes emitted by the RHS of this rule.
+     * Side effect: update ruleset's store of fnodes, its accounting of which
+     * rules are done executing, and its cache of results per type.
+     */
+    results(ruleset) {
+        if (ruleset.doneRules.has(this)) {  // shouldn't happen
+            throw new Error('A bug in Fathom caused results() to be called on an inward rule twice. That could cause redundant score multiplications, etc.');
+        }
+        const self = this;
+        // For now, we consider most of what a LHS computes to be cheap, aside
+        // from type() and type().max(), which are cached by their specialized
+        // LHS subclasses.
+        const leftFnodes = this.lhs.fnodes(ruleset);
+        // Avoid returning a single fnode more than once. LHSs uniquify
+        // themselves, but the RHS can change the element it's talking
+        // about and thus end up with dupes.
+        const returnedFnodes = new Set();
+
+        // Merge facts into fnodes:
+        forEach(
+            function updateFnode(leftFnode) {
+                const leftType = self.lhs.guaranteedType();
+                const fact = self.rhs.fact(leftFnode, leftType);
+                self.lhs.checkFact(fact);
+                const rightFnode = ruleset.fnodeForElement(fact.element || leftFnode.element);
+                // If the RHS doesn't specify a type, default to the
+                // type of the LHS, if any:
+                const rightType = fact.type || self.lhs.guaranteedType();
+                if (fact.conserveScore) {
+                    // If conserving, multiply in the input-type score
+                    // from the LHS node. (Never fall back to
+                    // multiplying in the RHS-type score from the LHS:
+                    // it's not guaranteed to be there, and even if it
+                    // will ever be, the executor doesn't guarantee it
+                    // has been filled in yet.)
+                    if (leftType !== undefined) {
+                        rightFnode.conserveScoreFrom(leftFnode, leftType, rightType);
+                    } else {
+                        throw new Error('conserveScore() was called in a rule whose left-hand side is a dom() selector and thus has no predictable type.');
+                    }
+                }
+                if (fact.score !== undefined) {
+                    if (rightType !== undefined) {
+                        rightFnode.multiplyScoreFor(rightType, fact.score);
+                    } else {
+                        throw new Error(`The right-hand side of a rule specified a score (${fact.score}) with neither an explicit type nor one we could infer from the left-hand side.`);
+                    }
+                }
+                if (fact.type !== undefined || fact.note !== undefined) {
+                    // There's a reason to call setNoteFor.
+                    if (rightType === undefined) {
+                        throw new Error(`The right-hand side of a rule specified a note (${fact.note}) with neither an explicit type nor one we could infer from the left-hand side. Notes are per-type, per-node, so that's a problem.`);
+                    } else {
+                        rightFnode.setNoteFor(rightType, fact.note);
+                    }
+                }
+                returnedFnodes.add(rightFnode);
+            },
+            leftFnodes);
+
+        // Update ruleset lookup tables.
+        // First, mark this rule as done:
+        ruleset.doneRules.add(this);
+        // Then, stick each fnode in typeCache under all applicable types.
+        // Optimization: we really only need to loop over the types
+        // this rule can possibly add.
+        for (let fnode of returnedFnodes) {
+            for (let type of fnode.typesSoFar()) {
+                setDefault(ruleset.typeCache, type, () => new Set()).add(fnode);
+            }
+        }
+        return returnedFnodes.values();
+    }
+
+    /**
+     * Return a Set of the types that could be emitted back into the system.
+     * To emit a type means to either to add it to a fnode emitted from the RHS
+     * or to leave it on such a fnode where it already exists.
+     */
+    typesItCouldEmit() {
+        const rhs = this.rhs.possibleEmissions();
+        if (!rhs.couldChangeType && this.lhs.guaranteedType() !== undefined) {
+            // It's a b -> b rule.
+            return new Set([this.lhs.guaranteedType()]);
+        } else if (rhs.possibleTypes.size > 0) {
+            // We can prove the type emission from the RHS alone.
+            return rhs.possibleTypes;
+        } else {
+            throw new Error('Could not determine the emitted type of a rule because its right-hand side calls props() without calling typeIn().');
+        }
+    }
+
+    /**
+     * Return a Set of types I could add to fnodes I output (where the fnodes
+     * did not already have them).
+     */
+    typesItCouldAdd() {
+        const ret = new Set(this.typesItCouldEmit());
+        ret.delete(this.lhs.guaranteedType());
+        return ret;
+    }
+
+    /**
+     * Add the types we could change to the superclass's result.
+     */
+    _typesFinalized() {
+        const self = this;
+        function typesThatCouldChange() {
+            const ret = new NiceSet();
+
+            // Get types that could change:
+            const emissions = self.rhs.possibleEmissions();
+            if (emissions.couldChangeType) {
+                // Get the possible guaranteed combinations of types on the LHS
+                // (taking just this LHS into account). For each combo, if the RHS
+                // adds a type that's not in the combo, the types in the combo get
+                // unioned into ret.
+                for (let combo of self.lhs.possibleTypeCombinations()) {
+                    for (let rhsType of emissions.possibleTypes) {
+                        if (!combo.has(rhsType)) {
+                            ret.extend(combo);
+                            break;
+                        }
+                    }
+                }
+            }
+            // Optimization: the possible combos could be later expanded to be
+            // informed by earlier rules which add the types mentioned in the LHS.
+            // If the only way for something to get B is to have Q first, then we
+            // can add Q to each combo and end up with fewer types finalized. Would
+            // this imply the existence of a Q->B->Q cycle and thus be impossible?
+            // Think about it. If we do this, we can centralize that logic here,
+            // rather than repeating it in all the Lhs subclasses).
+            return ret;
+        }
+
+        return typesThatCouldChange().extend(super._typesFinalized());
+    }
+}
+
+/**
+ * A rule whose RHS is an out(). This represents a final goal of a ruleset.
+ * Its results go out into the world, not inward back into the Fathom
+ * knowledgebase.
+ */
+class OutwardRule extends Rule {
+    /**
+     * Compute the whole thing, including any .through() and .allThrough().
+     * Do not mark me done in ruleset.doneRules; out rules are never marked as
+     * done so they can be requested many times without having to cache their
+     * (potentially big, since they aren't necessarily fnodes?) results. (We
+     * can add caching later if it proves beneficial.)
+     */
+    results(ruleset) {
+        return this.rhs.allCallback(map(this.rhs.callback, this.lhs.fnodes(ruleset)));
+    }
+
+    /**
+     * @return the key under which the output of this rule will be available
+     */
+    key() {
+        return this.rhs.key;
+    }
+
+    /**
+     * OutwardRules finalize all types mentioned.
+     */
+    _typesFinalized() {
+        return this.lhs.typesMentioned().extend(super._typesFinalized());
+    }
+}
+
+module.exports = {
+    InwardRule,
+    OutwardRule,
+    rule
+};