From d8ba31d0e4e7c450537e61c8aaf5c36c84d33390 Mon Sep 17 00:00:00 2001 From: Ryan Senior Date: Wed, 11 Apr 2018 11:35:59 -0500 Subject: [PATCH] Switch analyze steps to database focused rather than table Analyze will now work at the database level of granularity, rather than the table level of granularity. This will allow logging of summary information, such as when the step started and finished along with it's duration. This matches how sync is currently structured. --- src/metabase/sync/analyze.clj | 35 +++++++++++++++++------ src/metabase/sync/analyze/classify.clj | 18 ++++++++++++ src/metabase/sync/analyze/fingerprint.clj | 9 ++++++ src/metabase/sync/util.clj | 15 ++++++---- 4 files changed, 62 insertions(+), 15 deletions(-) diff --git a/src/metabase/sync/analyze.clj b/src/metabase/sync/analyze.clj index e9d9e81cf6e00..9976fa930ea51 100644 --- a/src/metabase/sync/analyze.clj +++ b/src/metabase/sync/analyze.clj @@ -54,16 +54,26 @@ ;; newly re-fingerprinted Fields, because we'll know to skip the ones from last time since their value of ;; `last_analyzed` is not `nil`. +(s/defn ^:private update-last-analyzed! + [tables :- [i/TableInstance]] + (when-let [ids (seq (map u/get-id tables))] + ;; The WHERE portion of this query should match up with that of `classify/fields-to-classify` + (db/update-where! Field {:table_id [:in ids] + :fingerprint_version i/latest-fingerprint-version + :last_analyzed nil} + :last_analyzed (u/new-sql-timestamp)))) (s/defn ^:private update-fields-last-analyzed! "Update the `last_analyzed` date for all the recently re-fingerprinted/re-classified Fields in TABLE." [table :- i/TableInstance] - ;; The WHERE portion of this query should match up with that of `classify/fields-to-classify` - (db/update-where! Field {:table_id (u/get-id table) - :fingerprint_version i/latest-fingerprint-version - :last_analyzed nil} - :last_analyzed (u/new-sql-timestamp))) + (update-last-analyzed! [table])) +(s/defn ^:private update-fields-last-analyzed-for-db! + "Update the `last_analyzed` date for all the recently re-fingerprinted/re-classified Fields in TABLE." + [database :- i/DatabaseInstance + tables :- [i/TableInstance]] + ;; The WHERE portion of this query should match up with that of `classify/fields-to-classify` + (update-last-analyzed! tables)) (s/defn analyze-table! "Perform in-depth analysis for a TABLE." @@ -75,6 +85,11 @@ (classify/classify-table! table) (update-fields-last-analyzed! table)) +(defn- maybe-log-progress [progress-bar-fn] + (fn [step table] + (let [progress-bar-result (progress-bar-fn)] + (when progress-bar-result + (log/info (u/format-color 'blue "%s Analyzed %s %s" step progress-bar-result (sync-util/name-for-logging table))))))) (s/defn analyze-db! "Perform in-depth analysis on the data for all Tables in a given DATABASE. @@ -83,7 +98,9 @@ [database :- i/DatabaseInstance] (sync-util/sync-operation :analyze database (format "Analyze data for %s" (sync-util/name-for-logging database)) (let [tables (sync-util/db->sync-tables database)] - (sync-util/with-emoji-progress-bar [emoji-progress-bar (count tables)] - (doseq [table tables] - (analyze-table! table) - (log/info (u/format-color 'blue "%s Analyzed %s" (emoji-progress-bar) (sync-util/name-for-logging table)))))))) + (sync-util/with-emoji-progress-bar [emoji-progress-bar (inc (* 3 (count tables)))] + (let [log-progress-fn (maybe-log-progress emoji-progress-bar)] + (fingerprint/fingerprint-fields-for-db! database tables log-progress-fn) + (classify/classify-fields-for-db! database tables log-progress-fn) + (classify/classify-tables-for-db! database tables log-progress-fn) + (update-fields-last-analyzed-for-db! database tables)))))) diff --git a/src/metabase/sync/analyze/classify.clj b/src/metabase/sync/analyze/classify.clj index 2c9ca3bb5f67b..0067e6cc82da0 100644 --- a/src/metabase/sync/analyze/classify.clj +++ b/src/metabase/sync/analyze/classify.clj @@ -124,3 +124,21 @@ setting) entitiy type of TABLE." [table :- i/TableInstance] (save-model-updates! table (name/infer-entity-type table))) + +(s/defn classify-tables-for-db! + "Classify all tables found in a given database" + [database :- i/DatabaseInstance + tables :- [i/TableInstance] + log-progress-fn] + (doseq [table tables] + (classify-table! table) + (log-progress-fn "clasify-tables" table))) + +(s/defn classify-fields-for-db! + "Classify all fields found in a given database" + [database :- i/DatabaseInstance + tables :- [i/TableInstance] + log-progress-fn] + (doseq [table tables] + (classify-fields! table) + (log-progress-fn "classify-fields" table))) diff --git a/src/metabase/sync/analyze/fingerprint.clj b/src/metabase/sync/analyze/fingerprint.clj index f8e5ee11c2066..8349cfed779d2 100644 --- a/src/metabase/sync/analyze/fingerprint.clj +++ b/src/metabase/sync/analyze/fingerprint.clj @@ -156,3 +156,12 @@ [table :- i/TableInstance] (when-let [fields (fields-to-fingerprint table)] (fingerprint-table! table fields))) + +(s/defn fingerprint-fields-for-db! + "Invokes `fingerprint-fields!` on every table in `database`" + [database :- i/DatabaseInstance + tables :- [i/TableInstance] + log-progress-fn] + (doseq [table tables] + (fingerprint-fields! table) + (log-progress-fn "fingerprint-fields" table))) diff --git a/src/metabase/sync/util.clj b/src/metabase/sync/util.clj index 4113937bc3aa0..1c374603b37d1 100644 --- a/src/metabase/sync/util.clj +++ b/src/metabase/sync/util.clj @@ -188,14 +188,16 @@ (emoji-progress-bar 10 40) -> \"[************······································] 😒 25%" - [completed total] + [completed total log-every-n] (let [percent-done (float (/ completed total)) filleds (int (* percent-done emoji-meter-width)) blanks (- emoji-meter-width filleds)] - (str "[" - (str/join (repeat filleds "*")) - (str/join (repeat blanks "·")) - (format "] %s %3.0f%%" (u/emoji (percent-done->emoji percent-done)) (* percent-done 100.0))))) + (when (or (zero? (mod completed log-every-n)) + (= completed total)) + (str "[" + (str/join (repeat filleds "*")) + (str/join (repeat blanks "·")) + (format "] %s %3.0f%%" (u/emoji (percent-done->emoji percent-done)) (* percent-done 100.0)))))) (defmacro with-emoji-progress-bar "Run BODY with access to a function that makes using our amazing emoji-progress-bar easy like Sunday morning. @@ -209,7 +211,8 @@ [[emoji-progress-fn-binding total-count] & body] `(let [finished-count# (atom 0) total-count# ~total-count - ~emoji-progress-fn-binding (fn [] (emoji-progress-bar (swap! finished-count# inc) total-count#))] + log-every-n# (Math/ceil (/ total-count# 10)) + ~emoji-progress-fn-binding (fn [] (emoji-progress-bar (swap! finished-count# inc) total-count# log-every-n#))] ~@body))