-
Notifications
You must be signed in to change notification settings - Fork 4.9k
/
describe_table.clj
400 lines (364 loc) · 19.5 KB
/
describe_table.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
(ns metabase.driver.sql-jdbc.sync.describe-table
"SQL JDBC impl for `describe-table`, `describe-table-fks`, and `describe-nested-field-columns`."
(:require [cheshire.core :as json]
[clojure.java.jdbc :as jdbc]
[clojure.set :as set]
[clojure.string :as str]
[clojure.tools.logging :as log]
[honeysql.core :as hsql]
[medley.core :as m]
[metabase.driver :as driver]
[metabase.driver.sql-jdbc.connection :as sql-jdbc.conn]
[metabase.driver.sql-jdbc.sync.common :as sql-jdbc.common]
[metabase.driver.sql-jdbc.sync.interface :as sql-jdbc.sync.interface]
[metabase.driver.sql.query-processor :as sql.qp]
[metabase.mbql.schema :as mbql.s]
[metabase.models.table :as table]
[metabase.util :as u]
[metabase.util.honeysql-extensions :as hx])
(:import [java.sql Connection DatabaseMetaData ResultSet]))
(defmethod sql-jdbc.sync.interface/column->semantic-type :sql-jdbc [_ _ _] nil)
(defn pattern-based-database-type->base-type
"Return a `database-type->base-type` function that matches types based on a sequence of pattern / base-type pairs.
`pattern->type` is a map of regex pattern to MBQL type keyword."
[pattern->type]
(fn database-type->base-type [column-type]
(let [column-type (name column-type)]
(some
(fn [[pattern base-type]]
(when (re-find pattern column-type)
base-type))
pattern->type))))
(defn get-catalogs
"Returns a set of all of the catalogs found via `metadata`"
[^DatabaseMetaData metadata]
(with-open [rs (.getCatalogs metadata)]
(set (map :table_cat (jdbc/metadata-result rs)))))
(defn- database-type->base-type-or-warn
"Given a `database-type` (e.g. `VARCHAR`) return the mapped Metabase type (e.g. `:type/Text`)."
[driver database-type]
(or (sql-jdbc.sync.interface/database-type->base-type driver (keyword database-type))
(do (log/warn (format "Don't know how to map column type '%s' to a Field base_type, falling back to :type/*."
database-type))
:type/*)))
(defn- calculated-semantic-type
"Get an appropriate semantic type for a column with `column-name` of type `database-type`."
[driver ^String column-name ^String database-type]
(when-let [semantic-type (sql-jdbc.sync.interface/column->semantic-type driver database-type column-name)]
(assert (isa? semantic-type :type/*)
(str "Invalid type: " semantic-type))
semantic-type))
(defmethod sql-jdbc.sync.interface/fallback-metadata-query :sql-jdbc
[driver schema table]
{:pre [(string? table)]}
;; Using our SQL compiler here to get portable LIMIT (e.g. `SELECT TOP n ...` for SQL Server/Oracle)
(let [honeysql {:select [:*]
:from [(sql.qp/->honeysql driver (hx/identifier :table schema table))]
:where [:not= 1 1]}
honeysql (sql.qp/apply-top-level-clause driver :limit honeysql {:limit 0})]
(sql.qp/format-honeysql driver honeysql)))
(defn fallback-fields-metadata-from-select-query
"In some rare cases `:column_name` is blank (eg. SQLite's views with group by) fallback to sniffing the type from a
SELECT * query."
[driver ^Connection conn table-schema table-name]
;; some DBs (:sqlite) don't actually return the correct metadata for LIMIT 0 queries
(let [[sql & params] (sql-jdbc.sync.interface/fallback-metadata-query driver table-schema table-name)]
(reify clojure.lang.IReduceInit
(reduce [_ rf init]
(with-open [stmt (sql-jdbc.common/prepare-statement driver conn sql params)
rs (.executeQuery stmt)]
(let [metadata (.getMetaData rs)]
(reduce
((map (fn [^Integer i]
{:name (.getColumnName metadata i)
:database-type (.getColumnTypeName metadata i)})) rf)
init
(range 1 (inc (.getColumnCount metadata))))))))))
(defn- jdbc-fields-metadata
"Reducible metadata about the Fields belonging to a Table, fetching using JDBC DatabaseMetaData methods."
[driver ^Connection conn db-name-or-nil schema table-name]
(sql-jdbc.common/reducible-results
#(.getColumns (.getMetaData conn)
db-name-or-nil
(some->> schema (driver/escape-entity-name-for-metadata driver))
(some->> table-name (driver/escape-entity-name-for-metadata driver))
nil)
(fn [^ResultSet rs]
;; https://docs.oracle.com/javase/7/docs/api/java/sql/DatabaseMetaData.html#getColumns(java.lang.String,%20java.lang.String,%20java.lang.String,%20java.lang.String)
#(let [default (.getString rs "COLUMN_DEF")
no-default? (contains? #{nil "NULL" "null"} default)
nullable (.getInt rs "NULLABLE")
not-nullable? (= 0 nullable)
auto-increment (.getString rs "IS_AUTOINCREMENT")
no-auto-increment? (= "NO" auto-increment)
column-name (.getString rs "COLUMN_NAME")
required? (and no-default? not-nullable? no-auto-increment?)]
(merge
{:name column-name
:database-type (.getString rs "TYPE_NAME")
:database-required required?}
(when-let [remarks (.getString rs "REMARKS")]
(when-not (str/blank? remarks)
{:field-comment remarks})))))))
(defn ^:private fields-metadata
[driver ^Connection conn {schema :schema, table-name :name} ^String db-name-or-nil]
{:pre [(instance? Connection conn) (string? table-name)]}
(reify clojure.lang.IReduceInit
(reduce [_ rf init]
;; 1. Return all the Fields that come back from DatabaseMetaData that include type info.
;;
;; 2. Iff there are some Fields that don't have type info, concatenate
;; `fallback-fields-metadata-from-select-query`, which fetches the same Fields using a different method.
;;
;; 3. Filter out any duplicates between the two methods using `m/distinct-by`.
(let [has-fields-without-type-info? (volatile! false)
jdbc-metadata (eduction
(remove (fn [{:keys [database-type]}]
(when (str/blank? database-type)
(vreset! has-fields-without-type-info? true)
true)))
(jdbc-fields-metadata driver conn db-name-or-nil schema table-name))
fallback-metadata (reify clojure.lang.IReduceInit
(reduce [_ rf init]
(reduce
rf
init
(when @has-fields-without-type-info?
(fallback-fields-metadata-from-select-query driver conn schema table-name)))))]
;; VERY IMPORTANT! DO NOT REWRITE THIS TO BE LAZY! IT ONLY WORKS BECAUSE AS NORMAL-FIELDS GETS REDUCED,
;; HAS-FIELDS-WITHOUT-TYPE-INFO? WILL GET SET TO TRUE IF APPLICABLE AND THEN FALLBACK-FIELDS WILL RUN WHEN
;; IT'S TIME TO START EVALUATING THAT.
(reduce
((comp cat (m/distinct-by :name)) rf)
init
[jdbc-metadata fallback-metadata])))))
(defn describe-table-fields-xf
"Returns a transducer for computing metatdata about the fields in `table`."
[driver table]
(map-indexed (fn [i {:keys [database-type], column-name :name, :as col}]
(let [semantic-type (calculated-semantic-type driver column-name database-type)]
(merge
(u/select-non-nil-keys col [:name :database-type :field-comment :database-required])
{:base-type (database-type->base-type-or-warn driver database-type)
:database-position i}
(when semantic-type
{:semantic-type semantic-type})
(when (and
(isa? semantic-type :type/SerializedJSON)
(driver/database-supports?
driver
:nested-field-columns
(table/database table)))
{:visibility-type :details-only}))))))
(defmulti describe-table-fields
"Returns a set of column metadata for `table` using JDBC Connection `conn`."
{:added "0.45.0"
:arglists '([driver ^Connection conn table ^String db-name-or-nil])}
driver/dispatch-on-initialized-driver
:hierarchy #'driver/hierarchy)
(defmethod describe-table-fields :sql-jdbc
[driver conn table db-name-or-nil]
(into
#{}
(describe-table-fields-xf driver table)
(fields-metadata driver conn table db-name-or-nil)))
(defn add-table-pks
"Using `metadata` find any primary keys for `table` and assoc `:pk?` to true for those columns."
[^DatabaseMetaData metadata table]
(let [pks (into #{} (sql-jdbc.common/reducible-results #(.getPrimaryKeys metadata nil nil (:name table))
(fn [^ResultSet rs]
#(.getString rs "COLUMN_NAME"))))]
(update table :fields (fn [fields]
(set (for [field fields]
(if-not (contains? pks (:name field))
field
(assoc field :pk? true))))))))
(defn- describe-table* [driver ^Connection conn table]
{:pre [(instance? Connection conn)]}
(->> (assoc (select-keys table [:name :schema])
:fields (describe-table-fields driver conn table nil))
;; find PKs and mark them
(add-table-pks (.getMetaData conn))))
(defn describe-table
"Default implementation of `driver/describe-table` for SQL JDBC drivers. Uses JDBC DatabaseMetaData."
[driver db-or-id-or-spec-or-conn table]
(if (instance? Connection db-or-id-or-spec-or-conn)
(describe-table* driver db-or-id-or-spec-or-conn table)
(let [spec (sql-jdbc.conn/db->pooled-connection-spec db-or-id-or-spec-or-conn)]
(with-open [conn (jdbc/get-connection spec)]
(describe-table* driver conn table)))))
(defn- describe-table-fks*
[_driver ^Connection conn {^String schema :schema, ^String table-name :name} & [^String db-name-or-nil]]
(into
#{}
(sql-jdbc.common/reducible-results #(.getImportedKeys (.getMetaData conn) db-name-or-nil schema table-name)
(fn [^ResultSet rs]
(fn []
{:fk-column-name (.getString rs "FKCOLUMN_NAME")
:dest-table {:name (.getString rs "PKTABLE_NAME")
:schema (.getString rs "PKTABLE_SCHEM")}
:dest-column-name (.getString rs "PKCOLUMN_NAME")})))))
(defn describe-table-fks
"Default implementation of `driver/describe-table-fks` for SQL JDBC drivers. Uses JDBC DatabaseMetaData."
[driver db-or-id-or-spec-or-conn table & [db-name-or-nil]]
(if (instance? Connection db-or-id-or-spec-or-conn)
(describe-table-fks* driver db-or-id-or-spec-or-conn table db-name-or-nil)
(let [spec (sql-jdbc.conn/db->pooled-connection-spec db-or-id-or-spec-or-conn)]
(with-open [conn (jdbc/get-connection spec)]
(describe-table-fks* driver conn table db-name-or-nil)))))
(def ^:const nested-field-sample-limit
"Number of rows to sample for describe-nested-field-columns"
500)
(def ^:dynamic *nested-field-column-max-row-length*
"Max string length for a row for nested field column before we just give up on parsing it.
Marked as mutable because we mutate it for tests."
50000)
(defn- flattened-row [field-name row]
(letfn [(flatten-row [row path]
(lazy-seq
(when-let [[[k v] & xs] (seq row)]
(cond (and (map? v) (not-empty v))
(into (flatten-row v (conj path k))
(flatten-row xs path))
:else
(cons [(conj path k) v]
(flatten-row xs path))))))]
(into {} (flatten-row row [field-name]))))
(defn- type-by-parsing-string
"Mostly just (type member) but with a bit to suss out strings which are ISO8601 and say that they are datetimes"
[member]
(let [member-type (type member)]
(if (and (instance? String member)
(mbql.s/can-parse-datetime? member))
java.time.LocalDateTime
member-type)))
(defn- row->types [row]
(into {} (for [[field-name field-val] row
;; We put top-level array row type semantics on JSON roadmap but skip for now
:when (map? field-val)]
(let [flat-row (flattened-row field-name field-val)]
(into {} (map (fn [[k v]] [k (type-by-parsing-string v)]) flat-row))))))
(defn- describe-json-xform [member]
((comp (map #(for [[k v] %
:when (< (count v) *nested-field-column-max-row-length*)]
[k (json/parse-string v)]))
(map #(into {} %))
(map row->types)) member))
(def ^:const max-nested-field-columns
"Maximum number of nested field columns."
100)
(defn- describe-json-rf
"Reducing function that takes a bunch of maps from row->types,
and gets them to conform to the type hierarchy,
going through and taking the lowest common denominator type at each pass,
ignoring the nils."
([] nil)
([acc-field-type-map] acc-field-type-map)
([acc-field-type-map second-field-type-map]
(into {}
(for [json-column (set/union (set (keys second-field-type-map))
(set (keys acc-field-type-map)))]
(cond
(or (nil? acc-field-type-map)
(nil? (acc-field-type-map json-column))
(= (hash (acc-field-type-map json-column))
(hash (second-field-type-map json-column))))
[json-column (second-field-type-map json-column)]
(or (nil? second-field-type-map)
(nil? (second-field-type-map json-column)))
[json-column (acc-field-type-map json-column)]
(every? #(isa? % Number) [(acc-field-type-map json-column)
(second-field-type-map json-column)])
[json-column java.lang.Number]
(every?
(fn [column-type]
(some (fn [allowed-type]
(isa? column-type allowed-type))
[String Number Boolean java.time.LocalDateTime]))
[(acc-field-type-map json-column) (second-field-type-map json-column)])
[json-column java.lang.String]
:else
[json-column nil])))))
(def field-type-map
"Map from Java types for deserialized JSON (so small subset of Java types) to MBQL types.
We actually do deserialize the JSON in order to determine types,
so the java / clojure types we get have to be matched to MBQL types"
{java.lang.String :type/Text
;; JSON itself has the single number type, but Java serde of JSON is stricter
java.lang.Long :type/Integer
clojure.lang.BigInt :type/BigInteger
java.math.BigInteger :type/BigInteger
java.lang.Integer :type/Integer
java.lang.Double :type/Float
java.lang.Float :type/Float
java.math.BigDecimal :type/Decimal
java.lang.Number :type/Number
java.lang.Boolean :type/Boolean
java.time.LocalDateTime :type/DateTime
clojure.lang.PersistentVector :type/Array
clojure.lang.PersistentArrayMap :type/Structured
clojure.lang.PersistentHashMap :type/Structured})
(def db-type-map
"Map from MBQL types to database types.
This is the lowest common denominator of types, hopefully,
although as of writing this is just geared towards Postgres types"
{:type/Text "text"
:type/Integer "bigint"
;; You might think that the ordinary 'bigint' type in Postgres and MySQL should be this.
;; However, Bigint in those DB's maxes out at 2 ^ 64.
;; JSON, like Javascript itself, will happily represent 1.8 * (10^308),
;; Losing digits merrily along the way.
;; We can't really trust anyone to use MAX_SAFE_INTEGER, in JSON-land..
;; So really without forcing arbitrary precision ('decimal' type),
;; we have too many numerical regimes to test.
;; (#22732) was basically the consequence of missing one.
:type/BigInteger "decimal"
:type/Float "double precision"
:type/Number "double precision"
:type/Decimal "decimal"
:type/Boolean "boolean"
:type/DateTime "timestamp"
:type/Array "text"
:type/Structured "text"})
(defn- field-types->fields [field-types]
(let [valid-fields (for [[field-path field-type] (seq field-types)]
(if (nil? field-type)
nil
(let [curr-type (get field-type-map field-type :type/*)]
{:name (str/join " \u2192 " (map name field-path)) ;; right arrow
:database-type (db-type-map curr-type)
:base-type curr-type
;; Postgres JSONB field, which gets most usage, doesn't maintain JSON object ordering...
:database-position 0
:visibility-type :normal
:nfc-path field-path})))
field-hash (apply hash-set (filter some? valid-fields))]
field-hash))
;; The name's nested field columns but what the people wanted (issue #708)
;; was JSON so what they're getting is JSON.
(defn describe-nested-field-columns
"Default implementation of `describe-nested-field-columns` for SQL JDBC drivers. Goes and queries the table if there are JSON columns for the nested contents."
[driver spec table]
(with-open [conn (jdbc/get-connection spec)]
(let [table-identifier-info [(:schema table) (:name table)]
table-fields (describe-table-fields driver conn table nil)
json-fields (filter #(= (:semantic-type %) :type/SerializedJSON) table-fields)]
(if (nil? (seq json-fields))
#{}
(let [json-field-names (mapv #(apply hx/identifier :field (into table-identifier-info [(:name %)])) json-fields)
table-identifier (apply hx/identifier :table table-identifier-info)
quote-type (case driver :postgres :ansi :mysql :mysql)
sql-args (hsql/format {:select json-field-names
:from [table-identifier]
:limit nested-field-sample-limit} :quoting quote-type)
query (jdbc/reducible-query spec sql-args {:identifiers identity})
field-types (transduce describe-json-xform describe-json-rf query)
fields (field-types->fields field-types)]
(if (> (count fields) max-nested-field-columns)
(do
(log/warn
(format
"More nested field columns detected than maximum. Limiting the number of nested field columns to %d."
max-nested-field-columns))
(set (take max-nested-field-columns fields)))
fields))))))