From 42f053d89cafea5798c39ef0ed0e93bf3eaa414b Mon Sep 17 00:00:00 2001 From: Tienson Qin Date: Wed, 10 Feb 2021 19:43:18 +0800 Subject: [PATCH] feat(full-text-search): use flexsearch instead of fuzzysort For full-text search. --- externs.js | 2 + package.json | 1 + src/main/frontend/components/search.cljs | 37 +++++++++- src/main/frontend/handler/editor.cljs | 2 +- src/main/frontend/handler/search.cljs | 13 ++-- src/main/frontend/search.cljs | 91 +++++++++++++----------- yarn.lock | 5 ++ 7 files changed, 104 insertions(+), 47 deletions(-) diff --git a/externs.js b/externs.js index 82400084a69..a228984ff86 100644 --- a/externs.js +++ b/externs.js @@ -17,6 +17,8 @@ dummy.getRangeAt = function() {}; dummy.getElementsByClassName = function() {}; dummy.containsNode = function() {}; dummy.select = function() {}; +dummy.search = function() {}; +dummy.add = function() {}; dummy.closest = function () {}; dummy.setAttribute = function() {}; dummy.getAttribute = function() {}; diff --git a/package.json b/package.json index 1ef8a3dc3cc..3b4707f0d5f 100644 --- a/package.json +++ b/package.json @@ -61,6 +61,7 @@ "diff": "5.0.0", "diff-match-patch": "^1.0.5", "electron": "^11.2.0", + "flexsearch": "^0.6.32", "fs": "^0.0.1-security", "fuzzysort": "git+https://github.com/getstation/fuzzysort#a66f5813825d2415b606cc69129070c4eb612ae2", "gulp-cached": "^1.1.1", diff --git a/src/main/frontend/components/search.cljs b/src/main/frontend/components/search.cljs index 305d7f68924..b8738a43fea 100644 --- a/src/main/frontend/components/search.cljs +++ b/src/main/frontend/components/search.cljs @@ -31,6 +31,41 @@ (let [switch (reductions not= true (map pred? coll (rest coll)))] (map (partial map first) (partition-by second (map list coll switch))))) +(defn highlight-exact-query + [content q] + (let [q-words (string/split q #" ") + lc-content (string/lower-case content) + lc-q (string/lower-case q)] + (if (or (string/includes? lc-content lc-q) + (not (re-find #" " q))) + (let [i (string/index-of lc-content lc-q) + [before after] [(subs content 0 i) (subs content (+ i (count q)))]] + [:p + (when-not (string/blank? before) + [:span before]) + [:mark (subs content i (+ i (count q)))] + (when-not (string/blank? after) + [:span after])]) + (let [elements (loop [words q-words + content content + result []] + (if (and (seq words) content) + (let [word (first words) + lc-word (string/lower-case word) + lc-content (string/lower-case content)] + (if-let [i (string/index-of lc-content lc-word)] + (recur (rest words) + (subs content (+ i (count word))) + (vec + (concat result + [[:span (subs content 0 i)] + [:mark (subs content i (+ i (count word)))]]))) + (recur nil + content + result))) + (conj result [:span content])))] + [:p elements])))) + (rum/defc highlight-fuzzy [content indexes] (let [n (count content) @@ -182,7 +217,7 @@ (:page/name page))] [:div.flex-1 [:div.text-sm.font-medium (str "-> " page)] - (highlight-fuzzy content indexes)]) + (highlight-exact-query content search-q)]) nil))})]))) diff --git a/src/main/frontend/handler/editor.cljs b/src/main/frontend/handler/editor.cljs index 518456021ae..85ad68fde62 100644 --- a/src/main/frontend/handler/editor.cljs +++ b/src/main/frontend/handler/editor.cljs @@ -1767,7 +1767,7 @@ (remove (fn [h] (contains? current-and-parents (:block/uuid h))) - (search/search q 10)))) + (search/block-search q 10)))) (defn get-matched-templates [q] diff --git a/src/main/frontend/handler/search.cljs b/src/main/frontend/handler/search.cljs index e7bd0b31f8f..09e9430ac6c 100644 --- a/src/main/frontend/handler/search.cljs +++ b/src/main/frontend/handler/search.cljs @@ -3,14 +3,17 @@ [frontend.state :as state] [goog.dom :as gdom] [frontend.search :as search] - [frontend.handler.notification :as notification-handler])) + [frontend.handler.notification :as notification-handler] + [promesa.core :as p])) (defn search [q] - (swap! state/state assoc :search/result - {:pages (search/page-search q) - :files (search/file-search q) - :blocks (search/search q 10)})) + ;; TODO: separate rendering for blocks + (p/let [blocks-result (search/block-search q 10)] + (swap! state/state assoc :search/result + {:pages (search/page-search q) + :files (search/file-search q) + :blocks blocks-result}))) (defn clear-search! [] diff --git a/src/main/frontend/search.cljs b/src/main/frontend/search.cljs index 330982bd2a6..234d1088845 100644 --- a/src/main/frontend/search.cljs +++ b/src/main/frontend/search.cljs @@ -12,25 +12,30 @@ [cljs-bean.core :as bean] [goog.object :as gobj] ["fuzzysort" :as fuzzy] - [medley.core :as medley])) + ["flexsearch" :as flexsearch] + [medley.core :as medley] + [promesa.core :as p])) (def fuzzy-go (gobj/get fuzzy "go")) (defonce prepare (gobj/get fuzzy "prepare")) (defonce highlight (gobj/get fuzzy "highlight")) (defn go - [q indice opts] - (fuzzy-go q indice opts)) + [q indice-type indice opts] + (case indice-type + :page + (fuzzy-go q indice opts) + + :block + (.search indice q opts))) (defn block->index [{:block/keys [uuid content format] :as block}] - (when (<= (count content) 1000) ; performance - (when-let [result (->> (text/remove-level-spaces content format) - (text/remove-properties!) - (prepare))] - (gobj/set result "id" (:db/id block)) - (gobj/set result "uuid" (str uuid)) - result))) + (when-let [result (->> (text/remove-level-spaces content format) + (text/remove-properties!))] + {:id (:db/id block) + :uuid (str uuid) + :content result})) (defn make-blocks-indice! [] @@ -38,9 +43,17 @@ (let [blocks (->> (db/get-all-block-contents) (map block->index) (remove nil?) - (bean/->js))] - (swap! indices assoc-in [repo :blocks] blocks) - blocks))) + (bean/->js)) + indice (flexsearch. + (clj->js + {:encode "icase" + :tokenize "full" + :doc {:id "id" + :field ["uuid" "content"]} + :async true}))] + (p/let [result (.add indice blocks)] + (swap! indices assoc-in [repo :blocks] indice)) + indice))) (defn make-pages-indice! [] @@ -136,10 +149,9 @@ :score (score query (.toLowerCase s))}))))) (map :data)))) -(defn search - "Block search" +(defn block-search ([q] - (search q 10)) + (block-search q 10)) ([q limit] (when-let [repo (state/get-current-repo)] (when-not (string/blank? q) @@ -147,21 +159,17 @@ q (escape-str q)] (when-not (string/blank? q) (let [indice (or (get-in @indices [repo :blocks]) - (make-blocks-indice!)) - result (-> - (go q indice (clj->js {:limit limit - :allowTypo false - :threshold -10000})) - (bean/->clj))] - (->> - (map - (fn [{:keys [target uuid indexes]}] - {:block/uuid uuid - :block/content target - :block/page (:block/page (db/entity [:block/uuid (medley/uuid (str uuid))])) - :block/indexes indexes}) ; For result highlight - result) - (remove nil?))))))))) + (make-blocks-indice!))] + (p/let [result (go q :block indice (clj->js {:limit limit})) + result (bean/->clj result)] + (->> + (map + (fn [{:keys [content uuid] :as block}] + {:block/uuid uuid + :block/content content + :block/page (:block/page (db/entity [:block/uuid (medley/uuid (str uuid))]))}) + result) + (remove nil?)))))))))) (defn page-search ([q] @@ -173,11 +181,12 @@ (when-not (string/blank? q) (let [indice (or (get-in @indices [repo :pages]) (make-pages-indice!)) - result (->> (go q indice (clj->js {:limit limit - :key "name" - :allowTypo false - :threshold -10000})) + result (->> (go q :page indice (clj->js {:limit limit + :key "name" + :allowTypo false + :threshold -10000})) (bean/->clj))] + ;; TODO: add indexes for highlights (->> (map (fn [{:keys [obj]}] (:name obj)) @@ -250,8 +259,10 @@ (map :e) (set))] (swap! search-db/indices update-in [repo :blocks] - (fn [blocks] - (let [blocks (or blocks (array)) - blocks (.filter blocks (fn [block] - (not (contains? blocks-to-remove-set (gobj/get block "id")))))] - (.concat blocks (bean/->js blocks-to-add))))))))))) + (fn [indice] + (when indice + (doseq [block-id blocks-to-remove-set] + (.remove indice #js {:id block-id})) + (when (seq blocks-to-add) + (.add indice (bean/->js blocks-to-add)))) + indice)))))))) diff --git a/yarn.lock b/yarn.lock index b6ae5a884c5..0c0dbf91cc7 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2312,6 +2312,11 @@ flatted@^3.1.0: resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.1.0.tgz#a5d06b4a8b01e3a63771daa5cb7a1903e2e57067" integrity sha512-tW+UkmtNg/jv9CSofAKvgVcO7c2URjhTdW1ZTkcAritblu8tajiYy7YisnIflEwtKssCtOxpnBRoCB7iap0/TA== +flexsearch@^0.6.32: + version "0.6.32" + resolved "https://registry.yarnpkg.com/flexsearch/-/flexsearch-0.6.32.tgz#1e20684d317af65baa445cdd9864a5f5b320f510" + integrity sha512-EF1BWkhwoeLtbIlDbY/vDSLBen/E5l/f1Vg7iX5CDymQCamcx1vhlc3tIZxIDplPjgi0jhG37c67idFbjg+v+Q== + flush-write-stream@^1.0.2: version "1.1.1" resolved "https://registry.yarnpkg.com/flush-write-stream/-/flush-write-stream-1.1.1.tgz#8dd7d873a1babc207d94ead0c2e0e44276ebf2e8"