Skip to content

Commit

Permalink
[QP, lib] Allow multiple arguments to :contains, :starts-with, et…
Browse files Browse the repository at this point in the history
…c. (#41958)

These string matching clauses only allowed two arguments previously.
Typically `[:contains field x]` to match a field against a literal.

This adds similar desugaring for `:contains`, `:does-not-contain`,
`:starts-with` and `:ends-with` that is currently done for
multi-argument `:=` and `:!=`:

```clojure
[:contains field x y z] ;; ->
[:or [:contains field x] [:contains field y] [:contains field z]]

[:does-not-contain field x y z] ;; ->
[:and [:does-not-contain field x]
      [:does-not-contain field y]
      [:does-not-contain field z]]
```
  • Loading branch information
bshepherdson committed May 1, 2024
1 parent bdb1a02 commit d142da9
Show file tree
Hide file tree
Showing 11 changed files with 243 additions and 59 deletions.
11 changes: 9 additions & 2 deletions src/metabase/legacy_mbql/normalize.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -577,14 +577,21 @@
(into [filter-name (canonicalize-implicit-field-id first-arg)]
(map canonicalize-mbql-clause other-args)))

(doseq [clause-name [:starts-with :ends-with :contains :does-not-contain
:= :!= :< :<= :> :>=
(doseq [clause-name [:= :!= :< :<= :> :>=
:is-empty :not-empty :is-null :not-null
:between]]
(defmethod canonicalize-mbql-clause clause-name
[clause]
(canonicalize-simple-filter-clause clause)))

;; These clauses have pMBQL-style options in index 1, when they have multiple arguments.
(doseq [tag [:starts-with :ends-with :contains :does-not-contain]]
(defmethod canonicalize-mbql-clause tag
[[_tag opts & args :as clause]]
(if (> (count args) 2)
(into [tag (or opts {})] (map canonicalize-mbql-clause args))
(canonicalize-simple-filter-clause clause))))

;;; aggregations/expression subclauses

;; Remove `:rows` type aggregation (long-since deprecated; simpliy means no aggregation) if present
Expand Down
35 changes: 30 additions & 5 deletions src/metabase/legacy_mbql/schema.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -798,13 +798,38 @@
;; default true
[:case-sensitive {:optional true} :boolean]])

(defclause starts-with, field StringExpressionArg, string-or-field StringExpressionArg, options (optional StringFilterOptions))
(defclause ends-with, field StringExpressionArg, string-or-field StringExpressionArg, options (optional StringFilterOptions))
(defclause contains, field StringExpressionArg, string-or-field StringExpressionArg, options (optional StringFilterOptions))
(doseq [clause-keyword [::starts-with ::ends-with ::contains ::does-not-contain]]
(mr/def clause-keyword
[:or
;; Binary form
(helpers/clause (keyword (name clause-keyword))
"field" StringExpressionArg
"string-or-field" StringExpressionArg
"options" [:optional StringFilterOptions])
;; Multi-arg form
(helpers/clause (keyword (name clause-keyword))
"options" StringFilterOptions
"field" StringExpressionArg
"string-or-field" StringExpressionArg
"second-string-or-field" StringExpressionArg
"more-strings-or-fields" [:rest StringExpressionArg])]))

(def ^{:clause-name :starts-with} starts-with
"Schema for a valid :starts-with clause."
[:ref ::starts-with])
(def ^{:clause-name :ends-with} ends-with
"Schema for a valid :ends-with clause."
[:ref ::ends-with])
(def ^{:clause-name :contains} contains
"Schema for a valid :contains clause."
[:ref ::contains])

;; SUGAR: this is rewritten as [:not [:contains ...]]
(defclause ^:sugar does-not-contain
field StringExpressionArg, string-or-field StringExpressionArg, options (optional StringFilterOptions))
(def ^{:sugar true
:clause-name :does-not-contain}
does-not-contain
"Schema for a valid :does-not-contain clause."
[:ref ::does-not-contain])

(def ^:private TimeIntervalOptions
;; Should we include partial results for the current day/month/etc? Defaults to `false`; set this to `true` to
Expand Down
34 changes: 27 additions & 7 deletions src/metabase/legacy_mbql/util.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -234,17 +234,28 @@
[:relative-datetime n unit]]))

(defn desugar-does-not-contain
"Rewrite `:does-not-contain` filter clauses as simpler `:not` clauses."
"Rewrite `:does-not-contain` filter clauses as simpler `[:not [:contains ...]]` clauses.
Note that [[desugar-multi-argument-comparisons]] will have already desugared any 3+ argument `:does-not-contain` to
several `[:and [:does-not-contain ...] [:does-not-contain ...] ...]` clauses, which then get rewritten here into
`[:and [:not [:contains ...]] [:not [:contains ...]]]`."
[m]
(lib.util.match/replace m
[:does-not-contain & args]
[:not (into [:contains] args)]))

(defn desugar-equals-and-not-equals-with-extra-args
"`:=` and `!=` clauses with more than 2 args automatically get rewritten as compound filters.
(defn desugar-multi-argument-comparisons
"`:=`, `!=`, `:contains`, `:does-not-contain`, `:starts-with` and `:ends-with` clauses with more than 2 args
automatically get rewritten as compound filters.
[:= field x y] -> [:or [:= field x] [:= field y]]
[:!= field x y] -> [:and [:!= field x] [:!= field y]]
[:does-not-contain field x y] -> [:and [:does-not-contain field x] [:does-not-contain field y]]
[:= field x y] -> [:or [:= field x] [:= field y]]
[:!= field x y] -> [:and [:!= field x] [:!= field y]]"
Note that the optional options map is in different positions for `:contains`, `:does-not-contain`, `:starts-with` and
`:ends-with` depending on the number of arguments. 2-argument forms use the legacy style `[:contains field x opts]`.
Multi-argument forms use pMBQL style with the options at index 1, **even if there are no options**:
`[:contains {} field x y z]`."
[m]
(lib.util.match/replace m
[:= field x y & more]
Expand All @@ -253,7 +264,16 @@

[:!= field x y & more]
(apply vector :and (for [x (concat [x y] more)]
[:!= field x]))))
[:!= field x]))

[(op :guard #{:contains :does-not-contain :starts-with :ends-with})
(opts :guard map?)
field x y & more]
(let [tail (when (seq opts) [opts])]
(apply vector
(if (= op :does-not-contain) :and :or)
(for [x (concat [x y] more)]
(into [op field x] tail))))))

(defn desugar-current-relative-datetime
"Replace `relative-datetime` clauses like `[:relative-datetime :current]` with `[:relative-datetime 0 <unit>]`.
Expand Down Expand Up @@ -431,7 +451,7 @@
[filter-clause :- mbql.s/Filter]
(-> filter-clause
desugar-current-relative-datetime
desugar-equals-and-not-equals-with-extra-args
desugar-multi-argument-comparisons
desugar-does-not-contain
desugar-time-interval
desugar-is-null-and-not-null
Expand Down
22 changes: 22 additions & 0 deletions src/metabase/lib/convert.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,19 @@
{:pre [(= (count clause) 4)]}
[tag opts (->pMBQL expr) n])

;; These four expressions have a different form depending on the number of arguments.
(doseq [tag [:contains :starts-with :ends-with :does-not-contain]]
(lib.hierarchy/derive tag ::string-comparison))

(defmethod ->pMBQL ::string-comparison
[[tag opts & args :as clause]]
(if (> (count args) 2)
;; Multi-arg, pMBQL style: [tag {opts...} x y z ...]
(lib.options/ensure-uuid (into [tag opts] (map ->pMBQL args)))
;; Two-arg, legacy style: [tag x y] or [tag x y opts].
(let [[tag x y opts] clause]
(lib.options/ensure-uuid [tag (or opts {}) (->pMBQL x) (->pMBQL y)]))))

(defn legacy-query-from-inner-query
"Convert a legacy 'inner query' to a full legacy 'outer query' so you can pass it to stuff
like [[metabase.legacy-mbql.normalize/normalize]], and then probably to [[->pMBQL]]."
Expand Down Expand Up @@ -477,6 +490,15 @@
{:pre [(= (count clause) 4)]}
[tag opts (->legacy-MBQL expr) n])

(defmethod ->legacy-MBQL ::string-comparison
[[tag opts & args]]
(if (> (count args) 2)
(into [tag (disqualify opts)] (map ->legacy-MBQL args)) ; Multi-arg, pMBQL style: [tag {opts...} x y z ...]
;; Two-arg, legacy style: [tag x y] or [tag x y opts].
(let [opts (disqualify opts)]
(cond-> (into [tag] (map ->legacy-MBQL args))
(seq opts) (conj opts)))))

(defn- update-list->legacy-boolean-expression
[m pMBQL-key legacy-key]
(cond-> m
Expand Down
68 changes: 40 additions & 28 deletions src/metabase/lib/filter.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@
(doseq [tag [:and :or]]
(lib.hierarchy/derive tag ::compound))

(doseq [tag [:= :!=]]
(doseq [tag [:= :!= :starts-with :ends-with :contains :does-not-contain]]
(lib.hierarchy/derive tag ::varargs))

(doseq [tag [:< :<= :> :>= :starts-with :ends-with :contains :does-not-contain]]
(doseq [tag [:< :<= :> :>=]]
(lib.hierarchy/derive tag ::binary))

(doseq [tag [:is-null :not-null :is-empty :not-empty :not]]
Expand Down Expand Up @@ -139,55 +139,67 @@
(i18n/tru "{0} is {1} selections" (->display-name a) (count args))

[:!= _ a & args]
(i18n/tru "{0} is not {1} selections" (->display-name a) (count args)))))

(defmethod lib.metadata.calculation/display-name-method ::binary
[query stage-number expr style]
(let [->display-name #(lib.metadata.calculation/display-name query stage-number % style)
->temporal-name #(shared.ut/format-unit % nil)
temporal? #(lib.util/original-isa? % :type/Temporal)]
(lib.util.match/match-one expr
[:< _ (x :guard temporal?) (y :guard string?)]
(i18n/tru "{0} is before {1}" (->display-name x) (->temporal-name y))

[:< _ x y]
(i18n/tru "{0} is less than {1}" (->display-name x) (->display-name y))

[:<= _ x y]
(i18n/tru "{0} is less than or equal to {1}" (->display-name x) (->display-name y))

[:> _ (x :guard temporal?) (y :guard string?)]
(i18n/tru "{0} is after {1}" (->display-name x) (->temporal-name y))

[:> _ x y]
(i18n/tru "{0} is greater than {1}" (->display-name x) (->display-name y))

[:>= _ x y]
(i18n/tru "{0} is greater than or equal to {1}" (->display-name x) (->display-name y))
(i18n/tru "{0} is not {1} selections" (->display-name a) (count args))

[:starts-with _ x (y :guard string?)]
(i18n/tru "{0} starts with {1}" (->display-name x) y)

[:starts-with _ x y]
(i18n/tru "{0} starts with {1}" (->display-name x) (->display-name y))

[:starts-with _ x & args]
(i18n/tru "{0} starts with {1} selections" (->display-name x) (count args))

[:ends-with _ x (y :guard string?)]
(i18n/tru "{0} ends with {1}" (->display-name x) y)

[:ends-with _ x y]
(i18n/tru "{0} ends with {1}" (->display-name x) (->display-name y))

[:ends-with _ x & args]
(i18n/tru "{0} ends with {1} selections" (->display-name x) (count args))

[:contains _ x (y :guard string?)]
(i18n/tru "{0} contains {1}" (->display-name x) y)

[:contains _ x y]
(i18n/tru "{0} contains {1}" (->display-name x) (->display-name y))

[:contains _ x & args]
(i18n/tru "{0} contains {1} selections" (->display-name x) (count args))

[:does-not-contain _ x (y :guard string?)]
(i18n/tru "{0} does not contain {1}" (->display-name x) y)

[:does-not-contain _ x y]
(i18n/tru "{0} does not contain {1}" (->display-name x) (->display-name y)))))
(i18n/tru "{0} does not contain {1}" (->display-name x) (->display-name y))

[:does-not-contain _ x & args]
(i18n/tru "{0} does not contain {1} selections" (->display-name x) (count args)))))

(defmethod lib.metadata.calculation/display-name-method ::binary
[query stage-number expr style]
(let [->display-name #(lib.metadata.calculation/display-name query stage-number % style)
->temporal-name #(shared.ut/format-unit % nil)
temporal? #(lib.util/original-isa? % :type/Temporal)]
(lib.util.match/match-one expr
[:< _ (x :guard temporal?) (y :guard string?)]
(i18n/tru "{0} is before {1}" (->display-name x) (->temporal-name y))

[:< _ x y]
(i18n/tru "{0} is less than {1}" (->display-name x) (->display-name y))

[:<= _ x y]
(i18n/tru "{0} is less than or equal to {1}" (->display-name x) (->display-name y))

[:> _ (x :guard temporal?) (y :guard string?)]
(i18n/tru "{0} is after {1}" (->display-name x) (->temporal-name y))

[:> _ x y]
(i18n/tru "{0} is greater than {1}" (->display-name x) (->display-name y))

[:>= _ x y]
(i18n/tru "{0} is greater than or equal to {1}" (->display-name x) (->display-name y)))))

(defmethod lib.metadata.calculation/display-name-method :between
[query stage-number expr style]
Expand Down
17 changes: 8 additions & 9 deletions src/metabase/lib/schema/filter.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -79,18 +79,17 @@
(def ^:private string-filter-options
[:map [:case-sensitive {:optional true} :boolean]]) ; default true

;; binary [:ref ::expression/string] filter clauses. These also accept a `:case-sensitive` option
;; N-ary [:ref ::expression/string] filter clauses. These also accept a `:case-sensitive` option.
;; Requires at least 2 string-shaped args. If there are more than 2, `[:contains x a b]` is equivalent to
;; `[:or [:contains x a] [:contains x b]]`.
;;
;; `:does-not-contain` is sugar for `[:not [:contains ...]]`:
;;
;; [:does-not-contain ...] = [:not [:contains ...]]
;; `[:does-not-contain ...]` = `[:not [:contains ...]]`
(doseq [op [:starts-with :ends-with :contains :does-not-contain]]
(mbql-clause/define-mbql-clause op :- :type/Boolean
[:tuple
[:= {:decode/normalize common/normalize-keyword} op]
[:merge ::common/options string-filter-options]
#_whole [:ref ::expression/string]
#_part [:ref ::expression/string]]))
[:schema [:catn {:error/message (str "Valid " op " clause")}
[:tag [:= {:decode/normalize common/normalize-keyword} op]]
[:options [:merge ::common/options string-filter-options]]
[:args [:repeat {:min 2} [:schema [:ref ::expression/string]]]]]]))

(def ^:private time-interval-options
[:map [:include-current {:optional true} :boolean]]) ; default false
Expand Down
3 changes: 2 additions & 1 deletion src/metabase/lib/schema/mbql_clause.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@
return-type)
nil))

;;; TODO -- add more stuff.
;;; TODO: Support options more nicely - these don't allow for overriding the options, but we have a few cases where that
;;; is necessary. See for example the inclusion of `string-filter-options` in [[metabase.lib.filter]].

(defn catn-clause-schema
"Helper intended for use with [[define-mbql-clause]]. Create an MBQL clause schema with `:catn`. Use this for clauses
Expand Down
36 changes: 30 additions & 6 deletions test/metabase/legacy_mbql/util_test.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -375,12 +375,36 @@
(mbql.u/desugar-filter-clause [:not-empty [:field 1 {:base-type :type/DateTime}]])))))

(t/deftest ^:parallel desugar-does-not-contain-test
(t/testing "desugaring does-not-contain without options"
(t/is (= [:not [:contains [:field 1 nil] "ABC"]]
(mbql.u/desugar-filter-clause [:does-not-contain [:field 1 nil] "ABC"]))))
(t/testing "desugaring does-not-contain *with* options"
(t/is (= [:not [:contains [:field 1 nil] "ABC" {:case-sensitive false}]]
(mbql.u/desugar-filter-clause [:does-not-contain [:field 1 nil] "ABC" {:case-sensitive false}])))))
(t/testing "desugaring does-not-contain"
(t/testing "without options"
(t/is (= [:not [:contains [:field 1 nil] "ABC"]]
(mbql.u/desugar-filter-clause [:does-not-contain [:field 1 nil] "ABC"]))))
(t/testing "*with* options"
(t/is (= [:not [:contains [:field 1 nil] "ABC" {:case-sensitive false}]]
(mbql.u/desugar-filter-clause [:does-not-contain [:field 1 nil] "ABC" {:case-sensitive false}]))))
(t/testing "desugaring does-not-contain with multiple arguments"
(t/testing "without options"
(t/is (= [:and
[:not [:contains [:field 1 nil] "ABC"]]
[:not [:contains [:field 1 nil] "XYZ"]]]
(mbql.u/desugar-filter-clause [:does-not-contain {} [:field 1 nil] "ABC" "XYZ"])))
(t/is (= [:and
[:not [:contains [:field 1 nil] "ABC"]]
[:not [:contains [:field 1 nil] "XYZ"]]
[:not [:contains [:field 1 nil] "LMN"]]]
(mbql.u/desugar-filter-clause [:does-not-contain {} [:field 1 nil] "ABC" "XYZ" "LMN"]))))
(t/testing "*with* options"
(t/is (= [:and
[:not [:contains [:field 1 nil] "ABC" {:case-sensitive false}]]
[:not [:contains [:field 1 nil] "XYZ" {:case-sensitive false}]]]
(mbql.u/desugar-filter-clause
[:does-not-contain {:case-sensitive false} [:field 1 nil] "ABC" "XYZ"])))
(t/is (= [:and
[:not [:contains [:field 1 nil] "ABC" {:case-sensitive false}]]
[:not [:contains [:field 1 nil] "XYZ" {:case-sensitive false}]]
[:not [:contains [:field 1 nil] "LMN" {:case-sensitive false}]]]
(mbql.u/desugar-filter-clause
[:does-not-contain {:case-sensitive false} [:field 1 nil] "ABC" "XYZ" "LMN"])))))))

(t/deftest ^:parallel desugar-temporal-extract-test
(t/testing "desugaring :get-year, :get-month, etc"
Expand Down
Loading

0 comments on commit d142da9

Please sign in to comment.