This repository has been archived by the owner on Aug 11, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
/
schema.clj
973 lines (813 loc) · 44.6 KB
/
schema.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
(ns metabase.mbql.schema
"Schema for validating a *normalized* MBQL query. This is also the definitive grammar for MBQL, wow!"
(:refer-clojure
:exclude
[count distinct min max + - / * and or not = < > <= >= time])
(:require [clojure
[core :as core]
[set :as set]]
[metabase.mbql.schema.helpers :refer [defclause is-clause? one-of]]
[metabase.mbql.util.match :as match]
[metabase.util.schema :as su]
[schema.core :as s])
(:import java.time.format.DateTimeFormatter))
;; A NOTE ABOUT METADATA:
;;
;; Clauses below are marked with the following tags for documentation purposes:
;;
;; * Clauses marked `^:sugar` are syntactic sugar primarily intended to make generating queries easier on the
;; frontend. These clauses are automatically rewritten as simpler clauses by the `desugar` or `expand-macros`
;; middleware. Thus driver implementations do not need to handle these clauses.
;;
;; * Clauses marked `^:internal` are automatically generated by `wrap-value-literals` or other middleware from values
;; passed in. They are not intended to be used by the frontend when generating a query. These add certain
;; information that simplify driver implementations. When writing MBQL queries yourself you should pretend these
;; clauses don't exist.
;;
;; * Clauses marked `^{:requires-features #{feature+}}` require a certain set of features to be used. At some date in
;; the future we will likely add middleware that uses this metadata to automatically validate that a driver has the
;; features needed to run the query in question.
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | MBQL Clauses |
;;; +----------------------------------------------------------------------------------------------------------------+
;;; ------------------------------------------------- Datetime Stuff -------------------------------------------------
(def DatetimeFieldUnit
"Schema for all valid datetime bucketing units."
(s/named
(apply s/enum #{:default :minute :minute-of-hour :hour :hour-of-day :day :day-of-week :day-of-month :day-of-year
:week :week-of-year :month :month-of-year :quarter :quarter-of-year :year})
"datetime-unit"))
(def ^:private RelativeDatetimeUnit
(s/named
(apply s/enum #{:default :minute :hour :day :week :month :quarter :year})
"relative-datetime-unit"))
(defn- can-parse-iso-8601? [^DateTimeFormatter formatter, ^String s]
(when (string? s)
(try
(.parse formatter s)
true
(catch Throwable _
false))))
(def ^:private ^{:arglists '([s])} can-parse-date? (partial can-parse-iso-8601? DateTimeFormatter/ISO_DATE))
(def ^:private ^{:arglists '([s])} can-parse-datetime? (partial can-parse-iso-8601? DateTimeFormatter/ISO_DATE_TIME))
(def ^:private ^{:arglists '([s])} can-parse-time? (partial can-parse-iso-8601? DateTimeFormatter/ISO_TIME))
(def LiteralDateString
"Schema for an ISO-8601-formatted date string literal."
(s/constrained su/NonBlankString can-parse-date? "valid ISO-8601 datetime string literal"))
(def LiteralDatetimeString
"Schema for an ISO-8601-formattedor datetime string literal."
(s/constrained su/NonBlankString can-parse-datetime? "valid ISO-8601 datetime string literal"))
(def LiteralTimeString
"Schema for an ISO-8601-formatted time string literal."
(s/constrained su/NonBlankString can-parse-time? "valid ISO-8601 time string literal"))
(def TemporalLiteralString
"Schema for either a literal datetime string, literal date string, or a literal time string."
(s/named
(s/conditional
can-parse-datetime? LiteralDatetimeString
can-parse-date? LiteralDateString
can-parse-time? LiteralTimeString)
"valid ISO-8601 datetime, date, or time string literal"))
;; TODO - `unit` is not allowed if `n` is `current`
(defclause relative-datetime
n (s/cond-pre (s/eq :current) s/Int)
unit (optional RelativeDatetimeUnit))
(defclause interval
n s/Int
unit RelativeDatetimeUnit)
;; This clause is automatically generated by middleware when datetime literals (literal strings or one of the Java
;; types) are encountered. Unit is inferred by looking at the Field the timestamp is compared against. Implemented
;; mostly to convenience driver implementations. You don't need to use this form directly when writing MBQL; datetime
;; literal strings are preferred instead.
;;
;; example:
;; [:= [:datetime-field [:field-id 10] :day] "2018-10-02"]
;;
;; becomes:
;; [:= [:datetime-field [:field-id 10] :day] [:absolute-datetime #inst "2018-10-02" :day]]
(defclause ^:internal absolute-datetime
timestamp java.sql.Timestamp
unit DatetimeFieldUnit)
;; it could make sense to say hour-of-day(field) = hour-of-day("2018-10-10T12:00")
;; but it does not make sense to say month-of-year(field) = month-of-year("08:00:00"),
;; does it? So we'll restrict the set of units a TimeValue can have to ones that have no notion of day/date.
(def TimeUnit
"Valid unit for time bucketing."
(apply s/enum #{:default :minute :minute-of-hour :hour :hour-of-day}))
;; almost exactly the same as `absolute-datetime`, but generated in some sitations where the literal in question was
;; clearly a time (e.g. "08:00:00.000") and/or the Field derived from `:type/Time` and/or the unit was a
;; time-bucketing unit
(defclause ^:internval time
time java.sql.Time
unit TimeUnit)
(def ^:private DatetimeLiteral
"Schema for valid absolute datetime literals."
(s/conditional
(partial is-clause? :absolute-datetime)
absolute-datetime
(partial is-clause? :time)
time
:else
(s/cond-pre
;; literal datetime strings and Java types will get transformed to `absolute-datetime` clauses automatically by
;; middleware so drivers don't need to deal with these directly. You only need to worry about handling
;; `absolute-datetime` clauses.
TemporalLiteralString
java.util.Date)))
(def DateTimeValue
"Schema for a datetime value drivers will personally have to handle, either an `absolute-datetime` form or a
`relative-datetime` form."
(one-of absolute-datetime relative-datetime time))
;;; -------------------------------------------------- Other Values --------------------------------------------------
(def ValueTypeInfo
"Type info about a value in a `:value` clause. Added automatically by `wrap-value-literals` middleware to values in
filter clauses based on the Field in the clause."
{(s/optional-key :database_type) (s/maybe su/NonBlankString)
(s/optional-key :base_type) (s/maybe su/FieldType)
(s/optional-key :special_type) (s/maybe su/FieldType)
(s/optional-key :unit) (s/maybe DatetimeFieldUnit)})
;; Arguments to filter clauses are automatically replaced with [:value <value> <type-info>] clauses by the
;; `wrap-value-literals` middleware. This is done to make it easier to implement query processors, because most driver
;; implementations dispatch off of Object type, which is often not enough to make informed decisions about how to
;; treat certain objects. For example, a string compared against a Postgres UUID Field needs to be parsed into a UUID
;; object, since text <-> UUID comparision doesn't work in Postgres. For this reason, raw literals in `:filter`
;; clauses are wrapped in `:value` clauses and given information about the type of the Field they will be compared to.
(defclause ^:internal value
value s/Any
type-info (s/maybe ValueTypeInfo))
;;; ----------------------------------------------------- Fields -----------------------------------------------------
;; Normal lowest-level Field clauses refer to a Field either by ID or by name
(defclause field-id, id su/IntGreaterThanZero)
(defclause field-literal, field-name su/NonBlankString, field-type su/FieldType)
(defclause joined-field, alias su/NonBlankString, field (one-of field-id field-literal))
;; Both args in `[:fk-> <source-field> <dest-field>]` are implict `:field-ids`. E.g.
;;
;; [:fk-> 10 20] --[NORMALIZE]--> [:fk-> [:field-id 10] [:field-id 20]]
;;
;; `fk->` clauses are automatically replaced by the Query Processor with appropriate `:joined-field` clauses during
;; preprocessing. Drivers do not need to handle `:fk->` clauses themselves.
(defclause ^{:requires-features #{:foreign-keys}} ^:sugar fk->
source-field (one-of field-id field-literal)
dest-field (one-of field-id field-literal))
;; Expression *references* refer to a something in the `:expressions` clause, e.g. something like `[:+ [:field-id 1]
;; [:field-id 2]]`
(defclause ^{:requires-features #{:expressions}} expression
expression-name su/NonBlankString)
;; `datetime-field` is used to specify DATE BUCKETING for a Field that represents a moment in time of some sort. There
;; is no requirement that all `:type/DateTime` derived Fields be wrapped in `datetime-field`, but for legacy reasons
;; `:field-id` clauses that refer to datetime Fields will be automatically "bucketed" in the `:breakout` and `:filter`
;; clauses, but nowhere else. Auto-bucketing only applies to `:filter` clauses when values for comparison are
;; `yyyy-MM-dd` date strings. See `auto-bucket-datetimes` for more details. `:field-id` clauses elsewhere will not be
;; automatically bucketed, so drivers still need to make sure they do any special datetime handling for plain
;; `:field-id` clauses when their Field derives from `:type/DateTime`.
;;
;; Datetime Field can wrap any of the lowest-level Field clauses, but not other datetime-field clauses, because that
;; wouldn't make sense. They similarly can not wrap expression references, because doing arithmetic on timestamps
;; doesn't make a whole lot of sense (what does `"2018-10-23"::timestamp / 2` mean?).
;;
;; Field is an implicit Field ID
(defclause datetime-field
field (one-of field-id field-literal fk-> joined-field)
unit DatetimeFieldUnit)
;; binning strategy can wrap any of the above clauses, but again, not another binning strategy clause
(def BinningStrategyName
"Schema for a valid value for the `strategy-name` param of a `binning-strategy` clause."
(s/enum :num-bins :bin-width :default))
(def BinnableField
"Schema for any sort of field clause that can be wrapped by a `binning-strategy` clause."
(one-of field-id field-literal joined-field fk-> datetime-field))
(def ResolvedBinningStrategyOptions
"Schema for map of options tacked on to the end of `binning-strategy` clauses by the `binning` middleware."
{:num-bins su/IntGreaterThanZero
:bin-width (s/constrained s/Num (complement neg?) "bin width must be >= 0.")
:min-value s/Num
:max-value s/Num})
;; TODO - binning strategy param is disallowed for `:default` and required for the others. For `num-bins` it must also
;; be an integer.
(defclause ^{:requires-features #{:binning}} binning-strategy
field BinnableField
strategy-name BinningStrategyName
strategy-param (optional (s/constrained s/Num (complement neg?) "strategy param must be >= 0."))
;; These are added in automatically by the `binning` middleware. Don't add them yourself, as they're just be
;; replaced. Driver implementations can rely on this being populated
resolved-options (optional ResolvedBinningStrategyOptions))
(def ^:private Field*
(one-of field-id field-literal joined-field fk-> datetime-field expression binning-strategy))
(def Field
"Schema for anything that refers to a Field, from the common `[:field-id <id>]` to variants like `:datetime-field` or
`:fk->` or an expression reference `[:expression <name>]`."
(s/recursive #'Field*))
;; aggregate field reference refers to an aggregation, e.g.
;;
;; {:aggregation [[:count]]
;; :order-by [[:asc [:aggregation 0]]]} ;; refers to the 0th aggregation, `:count`
;;
;; Currently aggregate Field references can only be used inside order-by clauses. In the future once we support SQL
;; `HAVING` we can allow them in filter clauses too
;;
;; TODO - shouldn't we allow composing aggregations in expressions? e.g.
;;
;; {:order-by [[:asc [:+ [:aggregation 0] [:aggregation 1]]]]}
;;
;; TODO - it would be nice if we could check that there's actually an aggregation with the corresponding index,
;; wouldn't it
(defclause aggregation, aggregation-clause-index s/Int)
(def FieldOrAggregationReference
"Schema for any type of valid Field clause, or for an indexed reference to an aggregation clause."
(s/if (partial is-clause? :aggregation)
aggregation
Field))
;;; -------------------------------------------------- Expressions ---------------------------------------------------
;; Expressions are "calculated column" definitions, defined once and then used elsewhere in the MBQL query.
(declare ArithmeticExpression)
(def ^:private ExpressionArg
(s/conditional
number?
s/Num
(partial is-clause? #{:+ :- :/ :*})
(s/recursive #'ArithmeticExpression)
:else
Field))
(def ^:private ExpressionArgOrInterval
(s/if (partial is-clause? :interval)
interval
ExpressionArg))
(defclause ^{:requires-features #{:expressions}} +
x ExpressionArg, y ExpressionArgOrInterval, more (rest ExpressionArg))
(defclause ^{:requires-features #{:expressions}} -, x ExpressionArg, y ExpressionArg, more (rest ExpressionArg))
(defclause ^{:requires-features #{:expressions}} /, x ExpressionArg, y ExpressionArg, more (rest ExpressionArg))
(defclause ^{:requires-features #{:expressions}} *, x ExpressionArg, y ExpressionArg, more (rest ExpressionArg))
(def ^:private ArithmeticExpression*
(one-of + - / *))
(def ^:private ArithmeticExpression
"Schema for the definition of an arithmetic expression."
(s/recursive #'ArithmeticExpression*))
(def FieldOrExpressionDef
"Schema for anything that is accepted as a top-level expression definition, either an arithmetic expression such as a
`:+` clause or a Field clause such as `:field-id`."
(s/if (partial is-clause? #{:+ :- :* :/})
ArithmeticExpression
Field))
;;; ----------------------------------------------------- Filter -----------------------------------------------------
(declare Filter)
(defclause and
first-clause (s/recursive #'Filter)
second-clause (s/recursive #'Filter)
other-clauses (rest (s/recursive #'Filter)))
(defclause or
first-clause (s/recursive #'Filter)
second-clause (s/recursive #'Filter)
other-clauses (rest (s/recursive #'Filter)))
(defclause not, clause (s/recursive #'Filter))
(def ^:private FieldOrRelativeDatetime
(s/if (partial is-clause? :relative-datetime)
relative-datetime
Field))
(def ^:private EqualityComparible
"Schema for things things that make sense in a `=` or `!=` filter, i.e. things that can be compared for equality."
(s/maybe
(s/cond-pre
s/Bool
s/Num
s/Str
DatetimeLiteral
FieldOrRelativeDatetime
value)))
(def ^:private OrderComparible
"Schema for things that make sense in a filter like `>` or `<`, i.e. things that can be sorted."
(s/if (partial is-clause? :value)
value
(s/cond-pre
s/Num
s/Str
DatetimeLiteral
FieldOrRelativeDatetime)))
;; For all of the non-compound Filter clauses below the first arg is an implicit Field ID
;; These are SORT OF SUGARY, because extra values will automatically be converted a compound clauses. Driver
;; implementations only need to handle the 2-arg forms.
;;
;; `=` works like SQL `IN` with more than 2 args
;; [:= [:field-id 1] 2 3] --[DESUGAR]--> [:or [:= [:field-id 1] 2] [:= [:field-id 1] 3]]
;;
;; `!=` works like SQL `NOT IN` with more than 2 args
;; [:!= [:field-id 1] 2 3] --[DESUGAR]--> [:and [:!= [:field-id 1] 2] [:!= [:field-id 1] 3]]
(defclause =, field Field, value-or-field EqualityComparible, more-values-or-fields (rest EqualityComparible))
(defclause !=, field Field, value-or-field EqualityComparible, more-values-or-fields (rest EqualityComparible))
(defclause <, field Field, value-or-field OrderComparible)
(defclause >, field Field, value-or-field OrderComparible)
(defclause <=, field Field, value-or-field OrderComparible)
(defclause >=, field Field, value-or-field OrderComparible)
(defclause between field Field, min OrderComparible, max OrderComparible)
;; SUGAR CLAUSE: This is automatically written as a pair of `:between` clauses by the `:desugar` middleware.
(defclause ^:sugar inside
lat-field Field
lon-field Field
lat-max OrderComparible
lon-min OrderComparible
lat-min OrderComparible
lon-max OrderComparible)
;; SUGAR CLAUSES: These are rewritten as `[:= <field> nil]` and `[:not= <field> nil]` respectively
(defclause ^:sugar is-null, field Field)
(defclause ^:sugar not-null, field Field)
(def ^:private StringFilterOptions
{(s/optional-key :case-sensitive) s/Bool}) ; default true
(def ^:private StringOrField
(s/cond-pre
s/Str
Field
value))
(defclause starts-with, field Field, string-or-field StringOrField, options (optional StringFilterOptions))
(defclause ends-with, field Field, string-or-field StringOrField, options (optional StringFilterOptions))
(defclause contains, field Field, string-or-field StringOrField, options (optional StringFilterOptions))
;; SUGAR: this is rewritten as [:not [:contains ...]]
(defclause ^:sugar does-not-contain
field Field, string-or-field StringOrField, options (optional StringFilterOptions))
(def ^:private TimeIntervalOptions
;; Should we include partial results for the current day/month/etc? Defaults to `false`; set this to `true` to
;; include them.
{(s/optional-key :include-current) s/Bool}) ; default false
;; Filter subclause. Syntactic sugar for specifying a specific time interval.
;;
;; Return rows where datetime Field 100's value is in the current month
;;
;; [:time-interval [:field-id 100] :current :month]
;;
;; Return rows where datetime Field 100's value is in the current month, including partial results for the
;; current day
;;
;; [:time-interval [:field-id 100] :current :month {:include-current true}]
;;
;; SUGAR: This is automatically rewritten as a filter clause with a relative-datetime value
(defclause ^:sugar time-interval
field (one-of field-id fk-> field-literal joined-field)
n (s/cond-pre
s/Int
(s/enum :current :last :next))
unit RelativeDatetimeUnit
options (optional TimeIntervalOptions))
;; A segment is a special `macro` that saves some pre-definied filter clause, e.g. [:segment 1]
;; this gets replaced by a normal Filter clause in MBQL macroexpansion
;;
;; It can also be used for GA, which looks something like `[:segment "gaid::-11"]`. GA segments aren't actually MBQL
;; segments and pass-thru to GA.
(defclause ^:sugar segment, segment-id (s/cond-pre su/IntGreaterThanZero su/NonBlankString))
(def ^:private Filter*
(one-of
;; filters drivers must implement
and or not = != < > <= >= between starts-with ends-with contains
;; SUGAR filters drivers do not need to implement
does-not-contain inside is-null not-null time-interval segment))
(def Filter
"Schema for a valid MBQL `:filter` clause."
(s/recursive #'Filter*))
;;; -------------------------------------------------- Aggregations --------------------------------------------------
;; For all of the 'normal' Aggregations below (excluding Metrics) fields are implicit Field IDs
;; cum-sum and cum-count are SUGAR because they're implemented in middleware. They clauses are swapped out with
;; `count` and `sum` aggregations respectively and summation is done in Clojure-land
(defclause ^{:requires-features #{:basic-aggregations}} ^:sugar count, field (optional Field))
(defclause ^{:requires-features #{:basic-aggregations}} ^:sugar cum-count, field (optional Field))
;; technically aggregations besides count can also accept expressions as args, e.g.
;;
;; [[:sum [:+ [:field-id 1] [:field-id 2]]]]
;;
;; Which is equivalent to SQL:
;;
;; SUM(field_1 + field_2)
(defclause ^{:requires-features #{:basic-aggregations}} avg, field-or-expression FieldOrExpressionDef)
(defclause ^{:requires-features #{:basic-aggregations}} cum-sum, field-or-expression FieldOrExpressionDef)
(defclause ^{:requires-features #{:basic-aggregations}} distinct, field-or-expression FieldOrExpressionDef)
(defclause ^{:requires-features #{:basic-aggregations}} sum, field-or-expression FieldOrExpressionDef)
(defclause ^{:requires-features #{:basic-aggregations}} min, field-or-expression FieldOrExpressionDef)
(defclause ^{:requires-features #{:basic-aggregations}} max, field-or-expression FieldOrExpressionDef)
(defclause ^{:requires-features #{:basic-aggregations}} sum-where
field-or-expression FieldOrExpressionDef, pred Filter)
(defclause ^{:requires-features #{:basic-aggregations}} count-where
pred Filter)
(defclause ^{:requires-features #{:basic-aggregations}} share
pred Filter)
(defclause ^{:requires-features #{:standard-deviation-aggregations}} stddev
field-or-expression FieldOrExpressionDef)
;; Metrics are just 'macros' (placeholders for other aggregations with optional filter and breakout clauses) that get
;; expanded to other aggregations/etc. in the expand-macros middleware
;;
;; METRICS WITH STRING IDS, e.g. `[:metric "ga:sessions"]`, are Google Analytics metrics, not Metabase metrics! They
;; pass straight thru to the GA query processor.
(defclause ^:sugar metric, metric-id (s/cond-pre su/IntGreaterThanZero su/NonBlankString))
;; the following are definitions for expression aggregations, e.g. [:+ [:sum [:field-id 10]] [:sum [:field-id 20]]]
(declare Aggregation)
(def ^:private ExpressionAggregationArg
(s/if number?
s/Num
(s/recursive #'Aggregation)))
(defclause [^{:requires-features #{:expression-aggregations}} ag:+ +]
x ExpressionAggregationArg, y ExpressionAggregationArg, more (rest ExpressionAggregationArg))
(defclause [^{:requires-features #{:expression-aggregations}} ag:- -]
x ExpressionAggregationArg, y ExpressionAggregationArg, more (rest ExpressionAggregationArg))
(defclause [^{:requires-features #{:expression-aggregations}} ag:* *]
x ExpressionAggregationArg, y ExpressionAggregationArg, more (rest ExpressionAggregationArg))
(defclause [^{:requires-features #{:expression-aggregations}} ag:div /]
x ExpressionAggregationArg, y ExpressionAggregationArg, more (rest ExpressionAggregationArg))
;; ag:/ isn't a valid token
(def ^:private UnnamedAggregation*
(one-of count avg cum-count cum-sum distinct stddev sum min max ag:+ ag:- ag:* ag:div metric share count-where
sum-where))
(def ^:private UnnamedAggregation
(s/recursive #'UnnamedAggregation*))
(def AggregationOptions
"Additional options for any aggregation clause when wrapping it in `:aggregation-options`."
{;; name to use for this aggregation in the native query instead of the default name (e.g. `count`)
(s/optional-key :name) su/NonBlankString
;; user-facing display name for this aggregation instead of the default one
(s/optional-key :display-name) su/NonBlankString})
(defclause aggregation-options
aggregation UnnamedAggregation
options AggregationOptions)
(def Aggregation
"Schema for anything that is a valid `:aggregation` clause."
(s/if (partial is-clause? :aggregation-options)
aggregation-options
UnnamedAggregation))
;;; ---------------------------------------------------- Order-By ----------------------------------------------------
;; order-by is just a series of `[<direction> <field>]` clauses like
;;
;; {:order-by [[:asc [:field-id 1]], [:desc [:field-id 2]]]}
;;
;; Field ID is implicit in these clauses
(defclause asc, field FieldOrAggregationReference)
(defclause desc, field FieldOrAggregationReference)
(def OrderBy
"Schema for an `order-by` clause subclause."
(one-of asc desc))
;;; +----------------------------------------------------------------------------------------------------------------+
;;; | Queries |
;;; +----------------------------------------------------------------------------------------------------------------+
;;; ---------------------------------------------- Native [Inner] Query ----------------------------------------------
;; TODO - schemas for template tags and dimensions live in `metabase.query-processor.middleware.parameters.sql`. Move
;; them here when we get the chance.
(def ^:private TemplateTag
s/Any) ; s/Any for now until we move over the stuff from the parameters middleware
(def NativeQuery
"Schema for a valid, normalized native [inner] query."
{:query s/Any
(s/optional-key :template-tags) {su/NonBlankString TemplateTag}
;; collection (table) this query should run against. Needed for MongoDB
(s/optional-key :collection) (s/maybe su/NonBlankString)
;; other stuff gets added in my different bits of QP middleware to record bits of state or pass info around.
;; Everyone else can ignore them.
s/Keyword s/Any})
;;; ----------------------------------------------- MBQL [Inner] Query -----------------------------------------------
(declare Query MBQLQuery)
(def SourceQuery
"Schema for a valid value for a `:source-query` clause."
(s/if (every-pred map? :native)
;; when using native queries as source queries the schema is exactly the same except use `:native` in place of
;; `:query` for reasons I do not fully remember (perhaps to make it easier to differentiate them from MBQL source
;; queries).
(set/rename-keys NativeQuery {:query :native})
(s/recursive #'MBQLQuery)))
(def SourceQueryMetadata
"Schema for the expected keys for a single column in `:source-metadata` (`:source-metadata` is a sequence of these
entries), if it is passed in to the query.
This metadata automatically gets added for all source queries that are referenced via the `card__id` `:source-table`
form; for explicit `:source-query`s you should usually include this information yourself when specifying explicit
`:source-query`s."
;; TODO - there is a very similar schema in `metabase.sync.analyze.query-results`; see if we can merge them
{:name su/NonBlankString
:base_type su/FieldType
;; this is only used by the annotate post-processing stage, not really needed at all for pre-processing, might be
;; able to remove this as a requirement
:display_name su/NonBlankString
(s/optional-key :special_type) (s/maybe su/FieldType)
;; you'll need to provide this in order to use BINNING
(s/optional-key :fingerprint) (s/maybe su/Map)
s/Any s/Any})
(def ^java.util.regex.Pattern source-table-card-id-regex
"Pattern that matches `card__id` strings that can be used as the `:source-table` of MBQL queries."
#"^card__[1-9]\d*$")
(def SourceTable
"Schema for a valid value for the `:source-table` clause of an MBQL query."
(s/cond-pre su/IntGreaterThanZero source-table-card-id-regex))
(def JoinField
"Schema for any valid `Field` that is, or wraps, a `:joined-field` clause."
(s/constrained
Field
(fn [field-clause]
(seq (match/match field-clause [:joined-field true])))
"`:joined-field` clause or Field clause wrapping a `:joined-field` clause"))
(def JoinFields
"Schema for valid values of a join `:fields` clause."
(s/named
(su/distinct (su/non-empty [JoinField]))
"Distinct, non-empty sequence of `:joined-field` clauses or Field clauses wrapping `:joined-field` clauses"))
(def JoinStrategy
"Strategy that should be used to perform the equivalent of a SQL `JOIN` against another table or a nested query.
These correspond 1:1 to features of the same name in driver features lists; e.g. you should check that the current
driver supports `:full-join` before generating a Join clause using that strategy."
(s/enum :left-join :right-join :inner-join :full-join))
(def Join
"Perform the equivalent of a SQL `JOIN` with another Table or nested `:source-query`. JOINs are either explicitly
specified in the incoming query, or implicitly generated when one uses a `:fk->` clause.
In the top-level query, you can reference Fields from the joined table or nested query by the `:fk->` clause for
implicit joins; for explicit joins, you *must* specify `:alias` yourself; you can then reference Fields by using a
`:joined-field` clause, e.g.
[:joined-field \"my_join_alias\" [:field-id 1]] ; for joins against other Tabless
[:joined-field \"my_join_alias\" [:field-literal \"my_field\" :field/Integer]] ; for joins against nested queries"
(->
{;; *What* to JOIN. Self-joins can be done by using the same `:source-table` as in the query where this is specified.
;; YOU MUST SUPPLY EITHER `:source-table` OR `:source-query`, BUT NOT BOTH!
(s/optional-key :source-table)
SourceTable
(s/optional-key :source-query)
SourceQuery
;;
;; The condition on which to JOIN. Can be anything that is a valid `:filter` clause. For automatically-generated
;; JOINs this is always
;;
;; [:= <source-table-fk-field> [:joined-field <join-table-alias> <dest-table-pk-field>]]
;;
:condition
Filter
;;
;; Defaults to `:left-join`; used for all automatically-generated JOINs
;;
;; Driver implementations: this is guaranteed to be present after pre-processing.
(s/optional-key :strategy)
JoinStrategy
;;
;; The Fields to include in the results *if* a top-level `:fields` clause *is not* specified. This can be either
;; `:none`, `:all`, or a sequence of Field clauses.
;;
;; * `:none`: no Fields from the joined table or nested query are included (unless indirectly included by
;; breakouts or other clauses). This is the default, and what is used for automatically-generated joins.
;;
;; * `:all`: will include all of the Fields from the joined table or query
;;
;; * a sequence of Field clauses: include only the Fields specified. Valid clauses are the same as the top-level
;; `:fields` clause. This should be non-empty and all elements should be distinct. The normalizer will
;; automatically remove duplicate fields for you, and replace empty clauses with `:none`.
;;
;; Driver implementations: you can ignore this clause. Relevant fields will be added to top-level `:fields` clause
;; with appropriate aliases.
(s/optional-key :fields)
(s/named
(s/cond-pre
(s/enum :all :none)
JoinFields)
(str
"Valid Join `:fields`: `:all`, `:none`, or a sequence of `:joined-field` clauses,"
" or clauses wrapping `:joined-field`."))
;;
;; The name used to alias the joined table or query. This is usually generated automatically and generally looks
;; like `table__via__field`. You can specify this yourself if you need to reference a joined field in a
;; `:joined-field` clause.
;;
;; Driver implementations: This is guaranteed to be present after pre-processing.
(s/optional-key :alias)
su/NonBlankString
;;
;; Used internally, only for annotation purposes in post-processing. When a join is implicitly generated via an
;; `:fk->` clause, the ID of the foreign key field in the source Table will be recorded here. This information is
;; used to add `fk_field_id` information to the `:cols` in the query results; I believe this is used to facilitate
;; drill-thru? :shrug:
;;
;; Don't set this information yourself. It will have no effect.
(s/optional-key :fk-field-id)
(s/maybe su/IntGreaterThanZero)
;;
;; Metadata about the source query being used, if pulled in from a Card via the `:source-table "card__id"` syntax.
;; added automatically by the `resolve-card-id-source-tables` middleware.
(s/optional-key :source-metadata)
(s/maybe [SourceQueryMetadata])}
(s/constrained
(every-pred
(some-fn :source-table :source-query)
(complement (every-pred :source-table :source-query)))
"Joins can must have either a `source-table` or `source-query`, but not both.")))
(def Joins
"Schema for a valid sequence of `Join`s. Must be a non-empty sequence, and `:alias`, if specified, must be unique."
(s/constrained
(su/non-empty [Join])
#(su/empty-or-distinct? (filter some? (map :alias %)))
"All join aliases must be unique."))
(def Fields
"Schema for valid values of the MBQL `:fields` clause."
(s/named
(su/distinct (su/non-empty [Field]))
"Distinct, non-empty sequence of Field clauses"))
(def MBQLQuery
"Schema for a valid, normalized MBQL [inner] query."
(->
{(s/optional-key :source-query) SourceQuery
(s/optional-key :source-table) SourceTable
(s/optional-key :aggregation) (su/non-empty [Aggregation])
(s/optional-key :breakout) (su/non-empty [Field])
;; TODO - expressions keys should be strings; fix this when we get a chance
(s/optional-key :expressions) {s/Keyword FieldOrExpressionDef}
(s/optional-key :fields) Fields
(s/optional-key :filter) Filter
(s/optional-key :limit) su/IntGreaterThanZero
(s/optional-key :order-by) (su/distinct (su/non-empty [OrderBy]))
;; page = page num, starting with 1. items = number of items per page.
;; e.g.
;; {:page 1, :items 10} = items 1-10
;; {:page 2, :items 10} = items 11-20
(s/optional-key :page) {:page su/IntGreaterThanZero
:items su/IntGreaterThanZero}
;;
;; Various bits of middleware add additonal keys, such as `fields-is-implicit?`, to record bits of state or pass
;; info to other pieces of middleware. Everyone else can ignore them.
(s/optional-key :joins) Joins
;;
;; Info about the columns of the source query. Added in automatically by middleware. This metadata is primarily
;; used to let power things like binning when used with Field Literals instead of normal Fields
(s/optional-key :source-metadata) (s/maybe [SourceQueryMetadata])
;;
;; Other keys are added by middleware or frontend client for various purposes
s/Keyword s/Any}
(s/constrained
(fn [query]
(core/= 1 (core/count (select-keys query [:source-query :source-table]))))
"Query must specify either `:source-table` or `:source-query`, but not both.")
(s/constrained
(fn [{:keys [breakout fields]}]
(empty? (set/intersection (set breakout) (set fields))))
"Fields specified in `:breakout` should not be specified in `:fields`; this is implied.")))
;;; ----------------------------------------------------- Params -----------------------------------------------------
(def ^:private Parameter
"Schema for a valid, normalized query parameter."
s/Any) ; s/Any for now until we move over the stuff from the parameters middleware
;;; ---------------------------------------------------- Options -----------------------------------------------------
(def ^:private Settings
"Options that tweak the behavior of the query processor."
;; The timezone the query should be ran in, overriding the default report timezone for the instance.
{(s/optional-key :report-timezone) su/NonBlankString
;; other Settings might be used somewhere, but I don't know about them. Add them if you come across them for
;; documentation purposes
s/Keyword s/Any})
(def ^:private Constraints
"Additional constraints added to a query limiting the maximum number of rows that can be returned. Mostly useful
because native queries don't support the MBQL `:limit` clause. For MBQL queries, if `:limit` is set, it will
override these values."
(s/constrained
{ ;; maximum number of results to allow for a query with aggregations. If `max-results-bare-rows` is unset, this
;; applies to all queries
(s/optional-key :max-results) su/IntGreaterThanOrEqualToZero
;; maximum number of results to allow for a query with no aggregations.
;; If set, this should be LOWER than `:max-results`
(s/optional-key :max-results-bare-rows) su/IntGreaterThanOrEqualToZero
;; other Constraints might be used somewhere, but I don't know about them. Add them if you come across them for
;; documentation purposes
s/Keyword s/Any}
(fn [{:keys [max-results max-results-bare-rows]}]
(if-not (core/and max-results max-results-bare-rows)
true
(core/>= max-results max-results-bare-rows)))
"max-results-bare-rows must be less or equal to than max-results"))
(def ^:private MiddlewareOptions
"Additional options that can be used to toggle middleware on or off."
{;; should we skip adding results_metadata to query results after running the query? Used by
;; `metabase.query-processor.middleware.results-metadata`; default `false`
(s/optional-key :skip-results-metadata?)
s/Bool
;; should we skip converting datetime types to ISO-8601 strings with appropriate timezone when post-processing
;; results? Used by `metabase.query-processor.middleware.format-rows`; default `false`
(s/optional-key :format-rows?)
s/Bool
;; disable the MBQL->native middleware. If you do this, the query will not work at all, so there are no cases where
;; you should set this yourself. This is only used by the `qp/query->preprocessed` function to get the fully
;; pre-processed query without attempting to convert it to native.
(s/optional-key :disable-mbql->native?)
s/Bool
;; Userland queries are ones ran as a result of an API call, Pulse, MetaBot query, or the like. Special handling is
;; done in the `process-userland-query` middleware for such queries -- results are returned in a slightly different
;; format, and QueryExecution entries are normally saved, unless you pass `:no-save` as the option.
(s/optional-key :userland-query?)
(s/maybe s/Bool)
;; Whether to add some default `max-results` and `max-results-bare-rows` constraints. By default, none are added,
;; although the functions that ultimately power most API endpoints tend to set this to `true`. See
;; `add-constraints` middleware for more details.
(s/optional-key :add-default-userland-constraints?)
(s/maybe s/Bool)
;; other middleware options might be used somewhere, but I don't know about them. Add them if you come across them
;; for documentation purposes
s/Keyword
s/Any})
;;; ------------------------------------------------------ Info ------------------------------------------------------
;; This stuff is used for informational purposes, primarily to record QueryExecution entries when a query is ran. Pass
;; them along if applicable when writing code that creates queries, but when working on middleware and the like you
;; can most likely ignore this stuff entirely.
(def Context
"Schema for `info.context`; used for informational purposes to record how a query was executed."
(s/enum :ad-hoc
:csv-download
:dashboard
:embedded-dashboard
:embedded-question
:json-download
:map-tiles
:metabot
:public-dashboard
:public-question
:pulse
:question
:xlsx-download))
;; TODO - this schema is somewhat misleading because if you use a function like
;; `qp/process-query-and-save-with-max-results-constraints!` some of these keys (e.g. `:context`) are in fact required
(def Info
"Schema for query `:info` dictionary, which is used for informational purposes to record information about how a query
was executed in QueryExecution and other places. It is considered bad form for middleware to change its behavior
based on this information, don't do it!"
{;; These keys are nice to pass in if you're running queries on the backend and you know these values. They aren't
;; used for permissions checking or anything like that so don't try to be sneaky
(s/optional-key :context) (s/maybe Context)
(s/optional-key :executed-by) (s/maybe su/IntGreaterThanZero)
(s/optional-key :card-id) (s/maybe su/IntGreaterThanZero)
(s/optional-key :dashboard-id) (s/maybe su/IntGreaterThanZero)
(s/optional-key :pulse-id) (s/maybe su/IntGreaterThanZero)
(s/optional-key :nested?) (s/maybe s/Bool)
;; `:hash` gets added automatically by `process-query-and-save-execution!`, so don't try passing
;; these in yourself. In fact, I would like this a lot better if we could take these keys out of `:info` entirely
;; and have the code that saves QueryExceutions figure out their values when it goes to save them
(s/optional-key :query-hash) (s/maybe (Class/forName "[B"))})
;;; --------------------------------------------- Metabase [Outer] Query ---------------------------------------------
(def ^Integer saved-questions-virtual-database-id
"The ID used to signify that a database is 'virtual' rather than physical.
A fake integer ID is used so as to minimize the number of changes that need to be made on the frontend -- by using
something that would otherwise be a legal ID, *nothing* need change there, and the frontend can query against this
'database' none the wiser. (This integer ID is negative which means it will never conflict with a *real* database
ID.)
This ID acts as a sort of flag. The relevant places in the middleware can check whether the DB we're querying is
this 'virtual' database and take the appropriate actions."
-1337)
;; To the reader: yes, this seems sort of hacky, but one of the goals of the Nested Query Initiative™ was to minimize
;; if not completely eliminate any changes to the frontend. After experimenting with several possible ways to do this
;; implementation seemed simplest and best met the goal. Luckily this is the only place this "magic number" is defined
;; and the entire frontend can remain blissfully unaware of its value.
(def DatabaseID
"Schema for a valid `:database` ID, in the top-level 'outer' query. Either a positive integer (referring to an
actual Database), or the saved questions virtual ID, which is a placeholder used for queries using the
`:source-table \"card__id\"` shorthand for a source query resolved by middleware (since clients might not know the
actual DB for that source query.)"
(s/cond-pre (s/eq saved-questions-virtual-database-id) su/IntGreaterThanZero))
(def Query
"Schema for an [outer] query, e.g. the sort of thing you'd pass to the query processor or save in
`Card.dataset_query`."
(->
{:database DatabaseID
;; Type of query. `:query` = MBQL; `:native` = native. TODO - consider normalizing `:query` to `:mbql`
:type (s/enum :query :native)
(s/optional-key :native) NativeQuery
(s/optional-key :query) MBQLQuery
(s/optional-key :parameters) [Parameter]
;;
;; OPTIONS
;;
;; These keys are used to tweak behavior of the Query Processor.
;; TODO - can we combine these all into a single `:options` map?
;;
(s/optional-key :settings) (s/maybe Settings)
(s/optional-key :constraints) (s/maybe Constraints)
(s/optional-key :middleware) (s/maybe MiddlewareOptions)
;;
;; INFO
;;
;; Used when recording info about this run in the QueryExecution log; things like context query was ran in and
;; User who ran it
(s/optional-key :info) (s/maybe Info)
;;
;; Other various keys get stuck in the query dictionary at some point or another by various pieces of QP
;; middleware to record bits of state. Everyone else can ignore them.
s/Keyword s/Any}
;;
;; CONSTRAINTS
;;
;; Make sure we have the combo of query `:type` and `:native`/`:query`
(s/constrained
(every-pred
(some-fn :native :query)
(complement (every-pred :native :query)))
"Query must specify either `:native` or `:query`, but not both.")
(s/constrained
(fn [{native :native, mbql :query, query-type :type}]
(case query-type
:native native
:query mbql))
"Native queries must specify `:native`; MBQL queries must specify `:query`.")
;;
;; `:source-metadata` is added to queries when `card__id` source queries are resolved. It contains info about the
;; columns in the source query.
;;
;; Where this is added was changed in Metabase 0.33.0 -- previously, when `card__id` source queries were resolved,
;; the middleware would add `:source-metadata` to the top-level; to support joins against source queries, this has
;; been changed so it is always added at the same level the resolved `:source-query` is added.
;;
;; This should automatically be fixed by `normalize`; if we encounter it, it means some middleware is not
;; functioning properly
(s/constrained
(complement :source-metadata)
"`:source-metadata` should be added in the same level as `:source-query` (i.e., the 'inner' MBQL query.)")))
;;; --------------------------------------------------- Validators ---------------------------------------------------
(def ^{:arglists '([query])} validate-query
"Compiled schema validator for an [outer] Metabase query. (Pre-compling a validator is more efficient; use this
instead of calling `(s/validate Query query)` or similar."
(s/validator Query))