Skip to content

Commit

Permalink
Add new :min & :max aggregation types 🍒 #1660
Browse files Browse the repository at this point in the history
  • Loading branch information
camsaul committed Mar 17, 2016
1 parent a9ba56e commit 249d718
Show file tree
Hide file tree
Showing 10 changed files with 118 additions and 34 deletions.
4 changes: 3 additions & 1 deletion frontend/src/lib/query.js
Expand Up @@ -163,7 +163,7 @@ var Query = {
},

canSortByAggregateField(query) {
var SORTABLE_AGGREGATION_TYPES = new Set(["avg", "count", "distinct", "stddev", "sum"]);
var SORTABLE_AGGREGATION_TYPES = new Set(["avg", "count", "distinct", "stddev", "sum", "min", "max"]);

return Query.hasValidBreakout(query) && SORTABLE_AGGREGATION_TYPES.has(query.aggregation[0]);
},
Expand Down Expand Up @@ -509,6 +509,8 @@ var Query = {
case "stddev": return ["Standard deviation of ", Query.getFieldName(tableMetadata, aggregation[1], options)];
case "sum": return ["Sum of ", Query.getFieldName(tableMetadata, aggregation[1], options)];
case "cum_sum": return ["Cumulative sum of ", Query.getFieldName(tableMetadata, aggregation[1], options)];
case "max": return ["Maximum of ", Query.getFieldName(tableMetadata, aggregation[1], options)];
case "min": return ["Minimum of ", Query.getFieldName(tableMetadata, aggregation[1], options)];
}
}
return "";
Expand Down
12 changes: 12 additions & 0 deletions frontend/src/lib/schema_metadata.js
Expand Up @@ -379,6 +379,18 @@ var Aggregators = [{
"validFieldsFilters": [summableFields],
"requiresField": true,
"requiredDriverFeature": "standard-deviation-aggregations"
}, {
name: "Minimum of ...",
short: "min",
description: "Minimum value of a column",
validFieldsFilters: [summableFields],
requiresField: true,
}, {
name: "Maximum of ...",
short: "max",
description: "Maximum value of a column",
validFieldsFilters: [summableFields],
requiresField: true,
}];

var BreakoutAggregator = {
Expand Down
30 changes: 28 additions & 2 deletions src/metabase/driver/druid/query_processor.clj
Expand Up @@ -97,6 +97,30 @@
:fnAggregate "function(current, x) { return current + (parseFloat(x) || 0); }"
:fnCombine "function(x, y) { return x + y; }"}))

(defn- ag:doubleMin [field]
(case (dimension-or-metric? field)
:metric {:type :doubleMin
:name :min
:fieldName (->rvalue field)}
:dimension {:type :javascript
:name :min
:fieldNames [(->rvalue field)]
:fnReset "function() { return Number.MAX_VALUE ; }"
:fnAggregate "function(current, x) { return Math.min(current, (parseFloat(x) || Number.MAX_VALUE)); }"
:fnCombine "function(x, y) { return Math.min(x, y); }"}))

(defn- ag:doubleMax [field]
(case (dimension-or-metric? field)
:metric {:type :doubleMin
:name :min
:fieldName (->rvalue field)}
:dimension {:type :javascript
:name :min
:fieldNames [(->rvalue field)]
:fnReset "function() { return Number.MIN_VALUE ; }"
:fnAggregate "function(current, x) { return Math.max(current, (parseFloat(x) || Number.MIN_VALUE)); }"
:fnCombine "function(x, y) { return Math.max(x, y); }"}))

(defn- ag:filtered [filtr aggregator] {:type :filtered, :filter filtr, :aggregator aggregator})

(defn- ag:count
Expand Down Expand Up @@ -127,7 +151,9 @@
[:distinct _] {:aggregations [{:type :cardinality
:name :count
:fieldNames [(->rvalue ag-field)]}]}
[:sum _] {:aggregations [(ag:doubleSum ag-field :sum)]})))))
[:sum _] {:aggregations [(ag:doubleSum ag-field :sum)]}
[:min _] {:aggregations [(ag:doubleMin ag-field)]}
[:max _] {:aggregations [(ag:doubleMax ag-field)]})))))


;;; ### handle-breakout
Expand Down Expand Up @@ -347,7 +373,7 @@
(defmulti ^:private handle-order-by query-type-dispatch-fn)

(defmethod handle-order-by ::query [_ _ _]
(log/warn (u/format-color 'red "Sorting with Druid is only allowed queries that have one or more breakout columns. Ignoring :order_by clause.")))
(log/warn (u/format-color 'red "Sorting with Druid is only allowed in queries that have one or more breakout columns. Ignoring :order_by clause.")))

(defmethod handle-order-by ::topN [_ {{ag-type :aggregation-type} :aggregation, [breakout-field] :breakout, [{field :field, direction :direction}] :order-by} druid-query]
(let [field (->rvalue field)
Expand Down
4 changes: 3 additions & 1 deletion src/metabase/driver/generic_sql/query_processor.clj
Expand Up @@ -104,7 +104,9 @@
:count (k/aggregate korma-form (count field) :count)
:distinct (k/aggregate korma-form (count (k/sqlfn :DISTINCT field)) :count) ; why not call it :distinct? This complicates things
:stddev (k/fields korma-form [(k/sqlfn* (sql/stddev-fn driver) field) :stddev])
:sum (k/aggregate korma-form (sum field) :sum)))))
:sum (k/aggregate korma-form (sum field) :sum)
:min (k/aggregate korma-form (min field) :min)
:max (k/aggregate korma-form (max field) :max)))))

(defn apply-breakout
"Apply a `breakout` clause to KORMA-FORM. Default implementation of `apply-breakout` for SQL drivers."
Expand Down
23 changes: 11 additions & 12 deletions src/metabase/driver/mongo/query_processor.clj
Expand Up @@ -96,6 +96,13 @@
:in `(let [~field ~(keyword (str "$$" (name field)))]
~@body)}})

;; As mentioned elsewhere for some arcane reason distinct aggregations come back named "count" and every thing else as the aggregation type
(defn- ag-type->field-name [ag-type]
(when ag-type
(if (= ag-type :distinct)
"count"
(name ag-type))))

(extend-protocol IField
Field
(->lvalue [this]
Expand All @@ -107,11 +114,7 @@
AgFieldRef
(->lvalue [_]
(let [{:keys [aggregation-type]} (:aggregation (:query *query*))]
(case aggregation-type
:avg "avg"
:count "count"
:distinct "count"
:sum "sum")))
(ag-type->field-name aggregation-type)))

DateTimeField
(->lvalue [{unit :unit, ^Field field :field}]
Expand Down Expand Up @@ -262,12 +265,6 @@

;;; ### aggregation

(def ^:private ^:const ag-type->field-name
{:avg "avg"
:count "count"
:distinct "count"
:sum "sum"})

(defn- aggregation->rvalue [{:keys [aggregation-type field]}]
(if-not field
(case aggregation-type
Expand All @@ -278,7 +275,9 @@
:then 1
:else 0}}}
:distinct {$addToSet (->rvalue field)}
:sum {$sum (->rvalue field)})))
:sum {$sum (->rvalue field)}
:min {$min (->rvalue field)}
:max {$max (->rvalue field)})))

(defn- handle-breakout+aggregation [{breakout-fields :breakout, {ag-type :aggregation-type, ag-field :field, :as aggregation} :aggregation} pipeline]
(let [aggregation? ag-type
Expand Down
4 changes: 3 additions & 1 deletion src/metabase/driver/query_processor/expand.clj
@@ -1,7 +1,7 @@
(ns metabase.driver.query-processor.expand
"Converts a Query Dict as received by the API into an *expanded* one that contains extra information that will be needed to
construct the appropriate native Query, and perform various post-processing steps such as Field ordering."
(:refer-clojure :exclude [< <= > >= = != and or not filter count distinct sum])
(:refer-clojure :exclude [< <= > >= = != and or not filter count distinct sum min max])
(:require (clojure [core :as core]
[string :as str])
[clojure.tools.logging :as log]
Expand Down Expand Up @@ -114,6 +114,8 @@
(def ^:ql ^{:arglists '([f])} distinct "Aggregation clause. Return the number of distinct values of F." (partial ag-with-field :distinct))
(def ^:ql ^{:arglists '([f])} sum "Aggregation clause. Return the sum of the values of F." (partial ag-with-field :sum))
(def ^:ql ^{:arglists '([f])} cum-sum "Aggregation clause. Return the cumulative sum of the values of F." (partial ag-with-field :cumulative-sum))
(def ^:ql ^{:arglists '([f])} min "Aggregation clause. Return the minimum value of F." (partial ag-with-field :min))
(def ^:ql ^{:arglists '([f])} max "Aggregation clause. Return the maximum value of F." (partial ag-with-field :max))

(defn ^:ql stddev
"Aggregation clause. Return the standard deviation of values of F.
Expand Down
3 changes: 2 additions & 1 deletion src/metabase/driver/query_processor/interface.clj
Expand Up @@ -209,7 +209,8 @@

(s/defrecord AggregationWithoutField [aggregation-type :- (s/eq :count)])

(s/defrecord AggregationWithField [aggregation-type :- (s/named (s/enum :avg :count :cumulative-sum :distinct :stddev :sum) "Valid aggregation type")
(s/defrecord AggregationWithField [aggregation-type :- (s/named (s/enum :avg :count :cumulative-sum :distinct :max :min :stddev :sum)
"Valid aggregation type")
field :- FieldPlaceholder])

(def Aggregation
Expand Down
3 changes: 1 addition & 2 deletions src/metabase/metabot.clj
Expand Up @@ -171,8 +171,7 @@

(defn- human-message? [{event-type :type, subtype :subtype}]
(and (= event-type "message")
(not= subtype "bot_message")
(not= subtype "message_deleted")))
(not (contains? #{"bot_message" "message_changed" "message_deleted"} subtype))))

(defn- event-timestamp-ms [{:keys [ts], :or {ts "0"}}]
(* (Double/parseDouble ts) 1000))
Expand Down
34 changes: 26 additions & 8 deletions test/metabase/driver/query_processor_test.clj
Expand Up @@ -231,14 +231,8 @@
:description nil
:extra_info {}
:target nil
:name (case ag-col-kw
:avg "avg"
:stddev "stddev"
:sum "sum")
:display_name (case ag-col-kw
:avg "avg"
:stddev "stddev"
:sum "sum")}))
:name (name ag-col-kw)
:display_name (name ag-col-kw)}))

(defn format-rows-by
"Format the values in result ROWS with the fns at the corresponding indecies in FORMAT-FNS.
Expand All @@ -247,6 +241,7 @@
(format-rows-by [int str double] [[1 1 1]]) -> [[1 \"1\" 1.0]]
By default, does't call fns on `nil` values; pass a truthy value as optional param FORMAT-NIL-VALUES? to override this behavior."
{:style/indent 1}
([format-fns rows]
(format-rows-by format-fns (not :format-nil-values?) rows))
([format-fns format-nil-values? rows]
Expand Down Expand Up @@ -1668,3 +1663,26 @@
(ql/aggregation (ql/count))
(ql/filter (ql/and (ql/not (ql/> $id 32))
(ql/contains $name "BBQ"))))))


;;; MIN & MAX

(expect-with-non-timeseries-dbs [1] (first-row (run-query venues
(ql/aggregation (ql/min $price)))))

(expect-with-non-timeseries-dbs [4] (first-row (run-query venues
(ql/aggregation (ql/max $price)))))

(expect-with-non-timeseries-dbs
[[1 34.0071] [2 33.7701] [3 10.0646] [4 33.983]]
(format-rows-by [int (partial u/round-to-decimals 4)]
(rows (run-query venues
(ql/aggregation (ql/min $latitude))
(ql/breakout $price)))))

(expect-with-non-timeseries-dbs
[[1 37.8078] [2 40.7794] [3 40.7262] [4 40.7677]]
(format-rows-by [int (partial u/round-to-decimals 4)]
(rows (run-query venues
(ql/aggregation (ql/max $latitude))
(ql/breakout $price)))))
@@ -1,7 +1,6 @@
(ns metabase.driver.event-query-processor-test
(ns metabase.driver.timeseries-query-processor-test
"Query processor tests for DBs that are event-based, like Druid.
There architecture is different enough that we can't test them along with our 'normal' DBs in `query-procesor-test`."
;; TODO - renamed this `timeseries-query-processor-test` since Druid is a "timeseries database" according to Wikipedia
(:require [expectations :refer :all]
[metabase.driver.query-processor.expand :as ql]
[metabase.driver.query-processor-test :refer [format-rows-by rows first-row]]
Expand All @@ -28,14 +27,16 @@
(datasets/with-engine-when-testing engine
(data/do-with-temp-db (flattened-db-def) (fn [& _])))))

(defmacro ^:private with-flattened-dbdef [& body]
`(data/with-temp-db [~'_ (flattened-db-def)]
~@body))

(defmacro ^:private expect-with-timeseries-dbs
{:style/indent 0}
[expected actual]
`(datasets/expect-with-engines event-based-dbs
(data/with-temp-db [~'_ (flattened-db-def)]
~expected)
(data/with-temp-db [~'_ (flattened-db-def)]
~actual)))
(with-flattened-dbdef ~expected)
(with-flattened-dbdef ~actual)))

(defn- data [results]
(when-let [data (or (:data results)
Expand Down Expand Up @@ -668,3 +669,25 @@
(expect-with-timeseries-dbs [1000] (first-row (data/run-query checkins
(ql/aggregation (ql/count)) ; test data is all in the past so nothing happened today <3
(ql/filter (ql/not (ql/time-interval $timestamp :current :day))))))



;;; MIN & MAX

(expect-with-timeseries-dbs [1.0] (first-row (data/run-query checkins
(ql/aggregation (ql/min $venue_price)))))

(expect-with-timeseries-dbs [4.0] (first-row (data/run-query checkins
(ql/aggregation (ql/max $venue_price)))))

(expect-with-timeseries-dbs
[["1" 34.0071] ["2" 33.7701] ["3" 10.0646] ["4" 33.983]] ; some sort of weird quirk w/ druid where all columns in breakout get converted to strings
(rows (data/run-query checkins
(ql/aggregation (ql/min $venue_latitude))
(ql/breakout $venue_price))))

(expect-with-timeseries-dbs
[["1" 37.8078] ["2" 40.7794] ["3" 40.7262] ["4" 40.7677]]
(rows (data/run-query checkins
(ql/aggregation (ql/max $venue_latitude))
(ql/breakout $venue_price))))

0 comments on commit 249d718

Please sign in to comment.