Skip to content

Commit

Permalink
Optimize lucene searches (#1420)
Browse files Browse the repository at this point in the history
improve lucene search
  • Loading branch information
ereteog committed Apr 8, 2024
1 parent 52e19a4 commit ac5291d
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 32 deletions.
9 changes: 6 additions & 3 deletions src/ctia/entity/feed.clj
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,8 @@
(-> (get-store :feed)
(query-string-search
{:search-query (routes.common/search-query {:date-field :created
:params params})
:params params}
services)
:ident identity-map
:params (select-keys params routes.common/search-options)})
(page-with-long-id services)
Expand All @@ -376,7 +377,8 @@
(ok (-> (get-store :feed)
(query-string-count
(routes.common/search-query {:date-field :created
:params params})
:params params}
services)
identity-map)))))

(let [capabilities #{:search-feed :delete-feed}]
Expand All @@ -389,7 +391,8 @@
:identity-map identity-map
:query [params FeedDeleteSearchParams]
(let [query (routes.common/search-query {:date-field :created
:params (dissoc params :wait_for :REALLY_DELETE_ALL_THESE_ENTITIES)})]
:params (dissoc params :wait_for :REALLY_DELETE_ALL_THESE_ENTITIES)}
services)]
(if (empty? query)
(forbidden {:error "you must provide at least one of from, to, query or any field filter."})
(ok
Expand Down
9 changes: 6 additions & 3 deletions src/ctia/entity/vulnerability.clj
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,12 @@

(defn search-by-ids
"Search `store` for vulnerabilities with id in seq `ids`."
[{:keys [identity-map ids limit params store]}]
[{:keys [identity-map ids limit params store]}
services]
(ctia-store/query-string-search store
{:search-query (assoc (routes.common/search-query {:date-field :timestamp
:params params})
:params params}
services)
:filter-map {:id (take limit ids)})
:ident identity-map
:params (assoc params :sort {:id :asc})}))
Expand Down Expand Up @@ -201,7 +203,8 @@
:store store})
:limit (or limit es-pagination/default-limit)
:params params
:store store})
:store store}
services)
(ent/page-with-long-id services)
routes.common/paginated-ok))))))

Expand Down
34 changes: 31 additions & 3 deletions src/ctia/http/routes/common.clj
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
(:require [clj-http.headers :refer [canonicalize]]
[clj-momo.lib.clj-time.core :as t]
[clojure.string :as str]
[ctia.properties :refer [get-http-show]]
[ctia.schemas.core :refer [SortExtensionDefinitions]]
[ctia.schemas.search-agg :refer [MetricResult
RangeQueryOpt
Expand Down Expand Up @@ -156,6 +157,31 @@
{:gte from
:lt to-or-now}))

(def uuid-pattern
"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}")

(def entity-type-pattern
"\\w+([-_]\\w+)*")

(def wildcard-ctia-id-re
(re-pattern (format "(\\w+[:])[*]((%s)[-]%s)" entity-type-pattern uuid-pattern)))

(defn long-id-prefix
[services]
(let [{:keys [hostname path-prefix port protocol] :as res} (get-http-show services)]
(str protocol
"://"
hostname
(when port (str ":" port))
path-prefix
"/ctia/")))

(defn prepare-lucene-id-search
[query services]
(str/replace query
wildcard-ctia-id-re
(format "$1\"%s$3/$2\"" (long-id-prefix services))))

(s/defn search-query :- SearchQuery
([{:keys [date-field make-date-range-fn]
{:keys [query
Expand All @@ -169,15 +195,17 @@
(cond-> {}
from (assoc :gte from)
to (assoc :lt to)))}}
:- SearchQueryArgs]
:- SearchQueryArgs
services]
(let [filter-map (apply dissoc params filter-map-search-options)
date-range (make-date-range-fn from to)]
date-range (make-date-range-fn from to)
prepared-query (when query (prepare-lucene-id-search query services))]
(cond-> {}
(seq date-range) (assoc-in [:range date-field] date-range)
(seq filter-map) (assoc :filter-map filter-map)
(or query simple_query) (assoc :full-text
(->> (cond-> []
query (conj {:query query, :query_mode :query_string})
query (conj {:query prepared-query :query_mode :query_string})
simple_query (conj {:query_mode :simple_query_string
:query simple_query}))
(mapv #(merge % (when search_fields
Expand Down
20 changes: 13 additions & 7 deletions src/ctia/http/routes/crud.clj
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@
(-> {:search-query (-> {:date-field date-field
:params params}
add-search-extensions
search-query)
(search-query services))
:ident identity-map
:params (select-keys params routes.common/search-options)}
add-search-extensions))
Expand All @@ -398,7 +398,8 @@
(ok (-> (get-store entity)
(store/query-string-count
(search-query {:date-field date-field
:params params})
:params params}
services)
identity-map))))
(DELETE "/" []
:auth-identity identity
Expand All @@ -409,7 +410,8 @@
:summary (format "Delete %s entities matching given Lucene/ES query string or/and field filters" capitalized)
:query [params (add-flags-to-delete-search-query-params search-filters)]
(let [query (search-query {:date-field date-field
:params (dissoc params :wait_for :REALLY_DELETE_ALL_THESE_ENTITIES)})]
:params (dissoc params :wait_for :REALLY_DELETE_ALL_THESE_ENTITIES)}
services)]
(if (empty? query)
(forbidden {:error "you must provide at least one of from, to, query or any field filter."})
(ok
Expand Down Expand Up @@ -445,7 +447,8 @@
:created)
search-q (search-query {:date-field date-field
:params (st/select-schema params agg-search-schema)
:make-date-range-fn coerce-date-range})
:make-date-range-fn coerce-date-range}
services)
agg-q (assoc (st/select-schema params AverageParams)
:agg-type :avg)]
(-> (get-store entity)
Expand All @@ -464,7 +467,8 @@
(let [aggregate-on (keyword (:aggregate-on params))
search-q (search-query {:date-field aggregate-on
:params (st/select-schema params agg-search-schema)
:make-date-range-fn coerce-date-range})
:make-date-range-fn coerce-date-range}
services)
agg-q (assoc (st/select-schema params HistogramParams)
:agg-type :histogram)]
(-> (get-store entity)
Expand All @@ -483,7 +487,8 @@
(let [aggregate-on (:aggregate-on params)
search-q (search-query {:date-field date-field
:params (st/select-schema params agg-search-schema)
:make-date-range-fn coerce-date-range})
:make-date-range-fn coerce-date-range}
services)
agg-q (assoc (st/select-schema params TopnParams)
:agg-type :topn)]
(-> (get-store entity)
Expand All @@ -502,7 +507,8 @@
(let [aggregate-on (:aggregate-on params)
search-q (search-query {:date-field date-field
:params (st/select-schema params agg-search-schema)
:make-date-range-fn coerce-date-range})
:make-date-range-fn coerce-date-range}
services)
agg-q (assoc (st/select-schema params CardinalityParams)
:agg-type :cardinality)]
(-> (get-store entity)
Expand Down
9 changes: 5 additions & 4 deletions test/ctia/entity/incident_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@
(deftest sort-scores-test
(es-helpers/for-each-es-version
"Can sort by multiple scores"
[7]
[5 7]
#(ductile.index/delete! % "ctia_*")
(helpers/with-properties (-> ["ctia.auth.type" "allow-all"]
(into es-helpers/basic-auth-properties)
Expand Down Expand Up @@ -422,7 +422,7 @@
([{:keys [bench-atom]}]
(es-helpers/for-each-es-version
"severity sorts like #'ctim-severity-order"
[5 7]
[7]
#(ductile.index/delete! % "ctia_*")
(helpers/with-properties (into ["ctia.auth.type" "allow-all"]
es-helpers/basic-auth-properties)
Expand Down Expand Up @@ -491,8 +491,9 @@
(let [_ (when bench-atom
(println)
(println "Benchmarking..." (pr-str test-id)))
[{:keys [parsed-body] :as raw} ms-time] (result+ms-time
(search-th/search-raw app :incident search-params))
[{:keys [parsed-body] :as raw} ms-time]
(result+ms-time
(search-th/search-raw app :incident search-params))

expected-parsed-body (sort-by (fn [{:keys [severity] :as incident}]
{:post [(number? %)]}
Expand Down
69 changes: 57 additions & 12 deletions test/ctia/http/routes/common_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -111,43 +111,60 @@
(is (csu/contains-key? schema path)
(format "%s contains %s key" entity (name sf))))))))

(def http-show-services
{:ConfigService {:get-in-config
#(get-in (assoc-in {}
[:ctia :http :show]
{:protocol "http"
:hostname "localhost"
:port 3000})
%)}
:CTIAHTTPServerService {:get-port (constantly 443)}})

(deftest search-query-test
(with-redefs [sut/now (constantly #inst "2020-12-31")]
(let [from #inst "2020-04-01"
to #inst "2020-06-01"]
(is (= {:full-text [{:query "bad-domain", :query_mode :query_string}]}
(sut/search-query {:date-field :created
:params {:query "bad-domain"}})))
:params {:query "bad-domain"}}
http-show-services)))
(is (= {:range {:created
{:gte from
:lt to}}}
(sut/search-query {:date-field :created
:params {:from from
:to to}})))
:to to}}
http-show-services)))
(is (= {:range {:timestamp
{:gte from
:lt to}}}
(sut/search-query {:date-field :timestamp
:params {:from from, :to to}})))
:params {:from from, :to to}}
http-show-services)))
(is (= {:range {:created
{:lt to}}}
(sut/search-query {:date-field :created
:params {:to to}})))
:params {:to to}}
http-show-services)))
(is (= {:range {:created
{:gte from}}}
(sut/search-query {:date-field :created
:params {:from from}})))
:params {:from from}}
http-show-services)))
(is (= {:filter-map {:title "firefox exploit"
:disposition 2}}
(sut/search-query {:date-field :created
:params {:title "firefox exploit", :disposition 2}})))
:params {:title "firefox exploit", :disposition 2}}
http-show-services)))
(is (= {:full-text [{:query "bad-domain", :query_mode :query_string}]
:filter-map {:title "firefox exploit"
:disposition 2}}
(sut/search-query {:date-field :created
:params {:query "bad-domain"
:disposition 2
:title "firefox exploit"}})))
:title "firefox exploit"}}
http-show-services)))
(is (= {:full-text [{:query "bad-domain", :query_mode :query_string}]
:filter-map {:title "firefox exploit"
:disposition 2}}
Expand All @@ -157,7 +174,8 @@
:title "firefox exploit"
:fields ["title"]
:sort_by "disposition"
:sort_order :desc}})))
:sort_order :desc}}
http-show-services)))
(is (= {:full-text [{:query "bad-domain", :query_mode :query_string}]
:range {:created {:gte from, :lt to}}
:filter-map {:title "firefox exploit"
Expand All @@ -170,7 +188,8 @@
:title "firefox exploit"
:fields ["title"]
:sort_by "disposition"
:sort_order :desc}})))
:sort_order :desc}}
http-show-services)))
(is (= {:full-text [{:query "lucene"
:query_mode :query_string
:fields ["title"]}
Expand All @@ -186,7 +205,8 @@
:title "firefox exploit"
:search_fields ["title"]
:sort_by "disposition"
:sort_order :desc}}))
:sort_order :desc}}
http-show-services))
"query and simple_query can be both submitted and accepted")
(is (= {:full-text [{:query "simple"
:query_mode :simple_query_string
Expand All @@ -199,7 +219,8 @@
:title "firefox exploit"
:search_fields ["title"]
:sort_by "disposition"
:sort_order :desc}}))
:sort_order :desc}}
http-show-services))
"simple_query can be the only full text search")
(testing "make-date-range-fn should be properly called"
(is (= {:range {:timestamp
Expand All @@ -210,7 +231,8 @@
:make-date-range-fn
(fn [from to]
{:gte #inst "2050-01-01"
:lt #inst "2100-01-01"})})))))))
:lt #inst "2100-01-01"})}
http-show-services)))))))

(deftest format-agg-result-test
(let [from #inst "2019-01-01"
Expand Down Expand Up @@ -332,3 +354,26 @@
entity-store (get-in-config [:ctia :store entity])]
(assert (= "es" entity-store) (pr-str entity-store))
(crud-wait-for-test parameters)))))

(deftest rewrite-id-search-test
(let [test-plan
[{:expected "source_ref:\"http://localhost:3000/ctia/casebook/casebook-aa8c5f29-11dd-433e-9a82-6b560a47a2cb\""
:query "source_ref:*casebook-aa8c5f29-11dd-433e-9a82-6b560a47a2cb"}

{:expected "source_ref:\"http://localhost:3000/ctia/asset-properties/asset-properties-aa8c5f29-11dd-433e-9a82-6b560a47a2cb\""
:query "source_ref:*asset-properties-aa8c5f29-11dd-433e-9a82-6b560a47a2cb"}

{:expected "source_ref:\"http://localhost:3000/ctia/incident/incident-aa8c5f29-11dd-433e-9a82-6b560a47a2cb\" AND a:\"http://localhost:3000/ctia/casebook/casebook-aa8c5f29-11dd-433e-9a82-6b560a47a2cb\""
:query "source_ref:*incident-aa8c5f29-11dd-433e-9a82-6b560a47a2cb AND a:*casebook-aa8c5f29-11dd-433e-9a82-6b560a47a2cb"}

{:expected "source_ref:incident-aa8c5f29-11dd-433e-9a82-6b560a47a2cb* AND a:\"http://localhost:3000/ctia/casebook/casebook-aa8c5f29-11dd-433e-9a82-6b560a47a2cb\""
:query "source_ref:incident-aa8c5f29-11dd-433e-9a82-6b560a47a2cb* AND a:*casebook-aa8c5f29-11dd-433e-9a82-6b560a47a2cb"}

{:expected "source_ref:*"
:query "source_ref:*"}

{:expected "*"
:query "*"}]]

(doseq [{:keys [expected query]} test-plan]
(is (= expected (sut/prepare-lucene-id-search query http-show-services))))))

0 comments on commit ac5291d

Please sign in to comment.