Skip to content

Commit

Permalink
[MLv2] Introduce swap-clauses to reorder aggregations, etc. (#39850)
Browse files Browse the repository at this point in the history
This includes some generative testing, since that felt like
a great way to try all kinds of permutations of swapping clauses.

This also patches `metabase.types/assignable?` to memoize it.
Deeply nested `assignable?` calls made 100 tests of swapping
filters take 1600ms rather than the 50-60ms of the other clauses.

I think this is safe and reasonable on memory to cache forever,
but speak up if you're concerned. There's perhaps O(60) type
keywords, so `type X type -> Boolean` is a small space impact.

Fixes #39215.
  • Loading branch information
bshepherdson committed Mar 11, 2024
1 parent 7a5912e commit 796256c
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 5 deletions.
3 changes: 3 additions & 0 deletions src/metabase/lib/core.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
[metabase.lib.remove-replace :as lib.remove-replace]
[metabase.lib.segment :as lib.segment]
[metabase.lib.stage :as lib.stage]
[metabase.lib.swap :as lib.swap]
[metabase.lib.table :as lib.table]
[metabase.lib.temporal-bucket :as lib.temporal-bucket]
[metabase.lib.util :as lib.util]
Expand Down Expand Up @@ -292,6 +293,8 @@
drop-stage
drop-empty-stages
has-clauses?]
[lib.swap
swap-clauses]
[lib.temporal-bucket
describe-temporal-unit
describe-temporal-interval
Expand Down
11 changes: 11 additions & 0 deletions src/metabase/lib/js.cljs
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,17 @@
(lib.core/normalize (js->clj target-clause :keywordize-keys true))
(lib.core/normalize (js->clj new-clause :keywordize-keys true))))

(defn ^:export swap-clauses
"Exchanges the positions of two clauses in a list. Can be used for filters, aggregations, breakouts, and expressions.
Returns the updated query. If it can't find both clauses in a single list (therefore also in the same stage), emits a
warning and returns the query unchanged."
[a-query stage-number source-clause target-clause]
(lib.core/swap-clauses
a-query stage-number
(lib.core/normalize (js->clj source-clause :keywordize-keys true))
(lib.core/normalize (js->clj target-clause :keywordize-keys true))))

(defn- prep-query-for-equals [a-query field-ids]
(-> a-query
mbql.js/normalize-cljs
Expand Down
44 changes: 44 additions & 0 deletions src/metabase/lib/swap.cljc
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
(ns metabase.lib.swap
(:require
[metabase.lib.options :as lib.options]
[metabase.lib.util :as lib.util]
[metabase.util.log :as log]))

(defn- swap-failure-no-match [stage target-clause]
(log/warn "No matching clause in swap-clauses" target-clause stage))

(defn- swap-failure-ambiguous [target-clause matches]
(log/warn "Ambiguous match for clause in swap-clauses" target-clause matches))

(defn- uuid-match [stage target-clause]
(let [target-uuid (lib.options/uuid target-clause)
matches (for [root [:aggregation :breakout :expressions :filters :order-by]
index (range (count (get stage root)))
:let [path [root index]
clause (get-in stage path)]
:when (= (lib.options/uuid clause) target-uuid)]
path)]
(case (count matches)
1 (first matches)
0 (swap-failure-no-match stage target-clause)
(swap-failure-ambiguous target-clause matches))))

(defn- do-swap [stage source-path target-path source-clause target-clause]
(-> stage
(assoc-in source-path target-clause)
(assoc-in target-path source-clause)))

(defn swap-clauses
"Given a `query` and `stage-number`, and two clauses, swaps the position of these two clauses in a list of clauses on
this stage. Can be used to reorder clauses in the UI.
Returns the query with the two clauses exchanged.
If either clause is not found inside the same list, emits a warning and returns the query unchanged."
[query stage-number source-clause target-clause]
(let [stage (lib.util/query-stage query stage-number)
source-path (uuid-match stage source-clause)
target-path (uuid-match stage target-clause)]
(if (and source-path target-path)
(lib.util/update-query-stage query stage-number do-swap source-path target-path source-clause target-clause)
query)))
14 changes: 9 additions & 5 deletions src/metabase/types.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -356,16 +356,20 @@

(declare-assignable :type/Integer :type/Decimal)

(defn assignable?
(declare assignable?)

(defn- assignable?* [x y]
(or (isa? assignable-hierarchy x y)
(boolean (some #(assignable? x %) (descendants y)))
(boolean (some #(assignable? % y) (parents x)))))

(def assignable?
"Is a value of type `x` assignable to a variable of type `y`?
When deciding assignability, We also consider the type hierarchy.
If x is assignable to z and z is a y, then x is also assignable to y.
Also, if z is assignable to y and x is an z, then x is assignable to y."
[x y]
(or (isa? assignable-hierarchy x y)
(boolean (some #(assignable? x %) (descendants y)))
(boolean (some #(assignable? % y) (parents x)))))
(memoize assignable?*))

(defn- most-specific-common-ancestor*
"Impl for [[most-specific-common-ancestor]]."
Expand Down
114 changes: 114 additions & 0 deletions test/metabase/lib/swap_test.cljc
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
(ns metabase.lib.swap-test
(:require
[clojure.test :refer [deftest is testing]]
[clojure.test.check.clojure-test :refer [defspec]]
[clojure.test.check.generators :as gen]
[clojure.test.check.properties :as prop]
[medley.core :as m]
[metabase.lib.core :as lib]
[metabase.lib.options :as lib.options]
[metabase.lib.test-metadata :as meta]
[metabase.test.util.log :as tu.log]
#?@(:cljs ([metabase.test-runner.assert-exprs.approximately-equal]))))

#?(:cljs (comment metabase.test-runner.assert-exprs.approximately-equal/keep-me))

(defn- swapped-clauses-prop [clause-fn clause-key query]
(prop/for-all*
[(gen/elements (clause-fn query))
(gen/elements (clause-fn query))]
(fn [source-clause target-clause]
(or (= source-clause target-clause) ; Skip any case where we happened to draw the same one twice.
(let [swapped (lib/swap-clauses query -1 source-clause target-clause)]
(and ;; Correctly rearranged the clauses.
(= (for [clause (clause-fn query)]
(cond
(= source-clause clause) target-clause
(= target-clause clause) source-clause
:else clause))
(clause-fn swapped))
;; And didn't change anything else.
(= (m/dissoc-in query [:stages 0 clause-key])
(m/dissoc-in swapped [:stages 0 clause-key]))))))))

(defspec swap-clauses-on-aggregations-test-permutations
(swapped-clauses-prop
lib/aggregations :aggregation
(-> (lib/query meta/metadata-provider (meta/table-metadata :orders))
(lib/aggregate (lib/count))
(lib/aggregate (lib/sum (meta/field-metadata :orders :subtotal)))
(lib/aggregate (lib/avg (meta/field-metadata :orders :discount)))
(lib/aggregate (lib/max (meta/field-metadata :orders :tax)))
(lib/aggregate (lib/min (meta/field-metadata :orders :total))))))

(defspec swap-clauses-on-breakouts-test-permutations 300 ; Extra tests to hit the similar cases all the time!
(swapped-clauses-prop
lib/breakouts :breakout
(as-> (lib/query meta/metadata-provider (meta/table-metadata :orders)) $q
(lib/breakout $q (meta/field-metadata :products :category))
(lib/breakout $q (meta/field-metadata :people :source))
;; Deliberately including the same field three times: without binning, and with two different binning settings.
(lib/breakout $q (meta/field-metadata :orders :subtotal))
(lib/breakout $q (lib/with-binning
(meta/field-metadata :orders :subtotal)
(second (lib/available-binning-strategies $q (meta/field-metadata :orders :subtotal)))))
(lib/breakout $q (lib/with-binning
(meta/field-metadata :orders :subtotal)
(nth (lib/available-binning-strategies $q (meta/field-metadata :orders :subtotal))
2)))
;; Likewise including multiple temporal buckets.
(lib/breakout $q (lib/with-temporal-bucket (meta/field-metadata :orders :created-at) :month))
(lib/breakout $q (lib/with-temporal-bucket (meta/field-metadata :orders :created-at) :year)))))

(defspec swap-clauses-on-filters-test-permutations
(swapped-clauses-prop
lib/filters :filters
(-> (lib/query meta/metadata-provider (meta/table-metadata :orders))
(lib/filter (lib/= (meta/field-metadata :products :category) "Doohickey"))
(lib/filter (lib/< (meta/field-metadata :products :created-at) "2024-01-01T00:00:00"))
(lib/filter (lib/is-null (meta/field-metadata :orders :discount)))
(lib/filter (lib/!= (meta/field-metadata :people :source) "Facebook")))))

(defspec swap-clauses-on-expressions-test-permutations
(swapped-clauses-prop
lib/expressions :expressions
(-> (lib/query meta/metadata-provider (meta/table-metadata :orders))
(lib/expression "clean-discount" (lib/coalesce (meta/field-metadata :orders :discount) 0))
(lib/expression "discount fraction" (lib// (meta/field-metadata :orders :discount)
(meta/field-metadata :orders :subtotal)))
(lib/expression "order month" (lib/get-month (meta/field-metadata :orders :created-at)))
(lib/expression "signup year" (lib/get-year (meta/field-metadata :people :created-at))))))

(defspec swap-clauses-on-order-by-test-permutations
(swapped-clauses-prop
lib/order-bys :order-by
(-> (lib/query meta/metadata-provider (meta/table-metadata :orders))
(lib/order-by (meta/field-metadata :products :category))
(lib/order-by (meta/field-metadata :orders :subtotal) :desc)
(lib/order-by (meta/field-metadata :people :latitude) :asc)
(lib/order-by (meta/field-metadata :orders :tax) :asc)
(lib/order-by (meta/field-metadata :orders :discount) :desc))))

(deftest ^:synchronized swap-clauses-not-found-test
(testing "swap-clauses emits a warning if a clause is not found"
(let [query (-> (lib/query meta/metadata-provider (meta/table-metadata :orders))
(lib/aggregate (lib/count))
(lib/aggregate (lib/sum (meta/field-metadata :orders :subtotal)))
(lib/aggregate (lib/min (meta/field-metadata :orders :total))))
[a1 a2 _a3] (lib/aggregations query)]
(is (=? [[:warn nil #"No matching clause in swap-clauses \[:count .*"]]
(tu.log/with-log-messages-for-level ['metabase.lib.swap :warn]
(lib/swap-clauses query -1 a2 (lib.options/update-options a1 assoc :lib/uuid (str (random-uuid))))))))))

(deftest ^:synchronized swap-clauses-ambiguous-test
(testing "swap-clauses emits a warning if multiple matching clauses are found"
;; This isn't really possible to do by accident, but anyway.
(let [query (-> (lib/query meta/metadata-provider (meta/table-metadata :orders))
(lib/aggregate (lib/count))
(lib/aggregate (lib/sum (meta/field-metadata :orders :subtotal)))
(lib/aggregate (lib/min (meta/field-metadata :orders :total))))
[a1 a2 _a3] (lib/aggregations query)
query (update-in query [:stages 0 :aggregation] conj a2)]
(is (=? [[:warn nil #"Ambiguous match for clause in swap-clauses \[:sum .*"]]
(tu.log/with-log-messages-for-level ['metabase.lib.swap :warn]
(lib/swap-clauses query -1 a1 a2)))))))

0 comments on commit 796256c

Please sign in to comment.