From 38f717c2adcf8a0ab618aad5e378ee2db822b4f0 Mon Sep 17 00:00:00 2001 From: Muhammad Abduh Date: Mon, 6 Dec 2021 17:14:12 +0700 Subject: [PATCH 1/6] feat(search): add search by field and support search by description field feat(search): use query string to search by field chore: comment out elasticsearch logger fix(discovery): update search by field query to match query chore(discovery): update swagger search api with searchby query param --- api/handlers/search_handler.go | 1 + cmd/serve.go | 6 + discovery/config.go | 3 + record/record.go | 7 + store/elasticsearch/search.go | 17 +- store/elasticsearch/search_test.go | 31 +++ .../testdata/search-test-fixture.json | 182 ++++++++++++++---- swagger.yaml | 4 + 8 files changed, 216 insertions(+), 35 deletions(-) diff --git a/api/handlers/search_handler.go b/api/handlers/search_handler.go index 843eb66b..74d8454d 100644 --- a/api/handlers/search_handler.go +++ b/api/handlers/search_handler.go @@ -77,6 +77,7 @@ func (handler *SearchHandler) buildSearchCfg(params url.Values) (cfg discovery.S cfg.MaxResults, _ = strconv.Atoi(params.Get("size")) cfg.Filters = filterConfigFromValues(params) cfg.RankBy = params.Get("rankby") + cfg.SearchByField = params.Get("searchby") cfg.TypeWhiteList, err = parseTypeWhiteList(params) return } diff --git a/cmd/serve.go b/cmd/serve.go index a1e5b7a5..1275e8bc 100644 --- a/cmd/serve.go +++ b/cmd/serve.go @@ -130,6 +130,12 @@ func initElasticsearch(config Config) *elasticsearch.Client { esClient, err := elasticsearch.NewClient(elasticsearch.Config{ Addresses: brokers, Transport: nrelasticsearch.NewRoundTripper(nil), + // uncomment below code to debug request and response to elasticsearch + // Logger: &estransport.ColorLogger{ + // Output: os.Stdout, + // EnableRequestBody: true, + // EnableResponseBody: true, + // }, }) if err != nil { log.Fatalf("error connecting to elasticsearch: %v", err) diff --git a/discovery/config.go b/discovery/config.go index 1c1f176d..0a985678 100644 --- a/discovery/config.go +++ b/discovery/config.go @@ -23,4 +23,7 @@ type SearchConfig struct { // RankBy is a param to rank based on a specific parameter RankBy string + + // SearchByField is a param to search a resource based on record's field + SearchByField string } diff --git a/record/record.go b/record/record.go index 1f376eb9..3b9f1ddb 100644 --- a/record/record.go +++ b/record/record.go @@ -14,6 +14,7 @@ type Record struct { Data map[string]interface{} `json:"data"` Labels map[string]string `json:"labels"` Tags []string `json:"tags"` + Owners []Owner `json:"owners"` Upstreams []LineageRecord `json:"upstreams"` Downstreams []LineageRecord `json:"downstreams"` CreatedAt time.Time `json:"created_at"` @@ -25,6 +26,12 @@ type LineageRecord struct { Type string `json:"type"` } +type Owner struct { + URN string `json:"urn"` + Name string `json:"name"` + Role string `json:"role"` +} + type ErrNoSuchRecord struct { RecordID string } diff --git a/store/elasticsearch/search.go b/store/elasticsearch/search.go index bc83f01c..c9ae83c0 100644 --- a/store/elasticsearch/search.go +++ b/store/elasticsearch/search.go @@ -164,7 +164,7 @@ func anyValidStringSlice(slices ...[]string) []string { func (sr *Searcher) buildQuery(ctx context.Context, cfg discovery.SearchConfig, indices []string) (io.Reader, error) { var query elastic.Query - query = sr.buildTextQuery(ctx, cfg.Text) + query = sr.buildTextQuery(ctx, cfg) query = sr.buildFilterQueries(query, cfg.Filters) query = sr.buildFunctionScoreQuery(query, cfg.RankBy) @@ -203,7 +203,20 @@ func (sr *Searcher) buildSuggestQuery(ctx context.Context, cfg discovery.SearchC return payload, err } -func (sr *Searcher) buildTextQuery(ctx context.Context, text string) elastic.Query { +func (sr *Searcher) buildTextQuery(ctx context.Context, cfg discovery.SearchConfig) elastic.Query { + if cfg.SearchByField == "" { + return sr.buildTextQueryGeneric(ctx, cfg.Text) + } else { + return sr.buildTextQueryByField(ctx, cfg.Text, cfg.SearchByField) + } +} + +func (sr *Searcher) buildTextQueryByField(ctx context.Context, text string, field string) elastic.Query { + return elastic.NewMatchQuery(field, text). + Fuzziness("AUTO") +} + +func (sr *Searcher) buildTextQueryGeneric(ctx context.Context, text string) elastic.Query { boostedFields := []string{ "urn^10", "name^5", diff --git a/store/elasticsearch/search_test.go b/store/elasticsearch/search_test.go index 663f4efd..a6f8316c 100644 --- a/store/elasticsearch/search_test.go +++ b/store/elasticsearch/search_test.go @@ -143,6 +143,37 @@ func TestSearcherSearch(t *testing.T) { {Type: "table", RecordID: "bigquery::gcpproject/dataset/tablename-1"}, }, }, + { + Description: "should return order-topic resource if search by description field with text 'submitted order'", + Config: discovery.SearchConfig{ + Text: "submitted order", + SearchByField: "description", + }, + Expected: []expectedRow{ + {Type: "topic", RecordID: "order-topic"}, + {Type: "topic", RecordID: "purchase-topic"}, + }, + }, + { + Description: "should return 'bigquery::gcpproject/dataset/tablename-1' resource if search by owner name field with text 'unique'", + Config: discovery.SearchConfig{ + Text: "unique", + SearchByField: "owners.name", + }, + Expected: []expectedRow{ + {Type: "table", RecordID: "bigquery::gcpproject/dataset/tablename-1"}, + }, + }, + { + Description: "should return 'bigquery::gcpproject/dataset/tablename-common' resource if search by table column name field with text 'tablename-common-column1'", + Config: discovery.SearchConfig{ + Text: "common", + SearchByField: "data.schema.columns.name", + }, + Expected: []expectedRow{ + {Type: "table", RecordID: "bigquery::gcpproject/dataset/tablename-common"}, + }, + }, } for _, test := range tests { t.Run(test.Description, func(t *testing.T) { diff --git a/store/elasticsearch/testdata/search-test-fixture.json b/store/elasticsearch/testdata/search-test-fixture.json index 077debb4..6d7576ed 100644 --- a/store/elasticsearch/testdata/search-test-fixture.json +++ b/store/elasticsearch/testdata/search-test-fixture.json @@ -1,8 +1,6 @@ -[ - { +[{ "type": "topic", - "records": [ - { + "records": [{ "urn": "order-topic", "name": "order-topic", "service": "kafka", @@ -61,8 +59,7 @@ }, { "type": "table", - "records": [ - { + "records": [{ "urn": "au2-microsoft-invoice", "name": "microsoft-invoice", "service": "postgres", @@ -75,8 +72,7 @@ "country": "us", "description": "Transaction records for every microsoft purchase", "total_rows": 100, - "schema": [ - { + "schema": [{ "name": "id" }, { @@ -103,8 +99,7 @@ "country": "id", "description": "Transaction records for every Apple purchase", "total_rows": 100, - "schema": [ - { + "schema": [{ "name": "id" }, { @@ -126,15 +121,13 @@ "data": { "preview": {}, "profile": { - "common_join": [ - { - "conditions": [ - "ON target.column_1 = source.column_1 and target.column_3 = source.column_3 and DATE(target.event_timestamp) = DATE(source.event_timestamp)" - ], - "count": 1, - "urn": "bigquery::gcpproject/dataset/tablename-mid" - } - ], + "common_join": [{ + "conditions": [ + "ON target.column_1 = source.column_1 and target.column_3 = source.column_3 and DATE(target.event_timestamp) = DATE(source.event_timestamp)" + ], + "count": 1, + "urn": "bigquery::gcpproject/dataset/tablename-mid" + }], "filter_conditions": [ "WHERE t.column_5 = 'success' AND t.item_id = \"280481a2-2384-4b81-aa3e-214ac60b31db\" AND event_timestamp >= TIMESTAMP(\"2021-10-29\", \"UTC\") AND event_timestamp < TIMESTAMP(\"2021-11-22T02:01:06Z\")" ], @@ -152,12 +145,50 @@ "owner": "user_1" } }, + "schema": { + "columns": [{ + "data_type": "STRING", + "is_nullable": true, + "name": "tablename-1-column1", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + }, + { + "data_type": "STRING", + "is_nullable": true, + "name": "tablename-1-column2", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + }, + { + "data_type": "BIGNUMERIC", + "is_nullable": true, + "name": "tablename-1-column3", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + } + ] + }, "resource": { "name": "tablename-1", "service": "bigquery", "urn": "bigquery::gcpproject/dataset/tablename-1" } - } + }, + "owners": [{ + "urn": "owner::bq/unique", + "name": "Mrs Unique", + "role": "user" + }] }, { "urn": "bigquery::gcpproject/dataset/tablename-common", @@ -167,15 +198,13 @@ "data": { "preview": {}, "profile": { - "common_join": [ - { - "conditions": [ - "ON target.column_1 = source.column_1 and target.column_3 = source.column_3 and DATE(target.event_timestamp) = DATE(source.event_timestamp)" - ], - "count": 1, - "urn": "bigquery::gcpproject/dataset/tablename-mid" - } - ], + "common_join": [{ + "conditions": [ + "ON target.column_1 = source.column_1 and target.column_3 = source.column_3 and DATE(target.event_timestamp) = DATE(source.event_timestamp)" + ], + "count": 1, + "urn": "bigquery::gcpproject/dataset/tablename-mid" + }], "filter_conditions": [ "WHERE t.column_5 = 'success' AND t.item_id = \"280481a2-2384-4b81-aa3e-214ac60b31db\" AND event_timestamp >= TIMESTAMP(\"2021-10-29\", \"UTC\") AND event_timestamp < TIMESTAMP(\"2021-11-22T02:01:06Z\")" ], @@ -193,12 +222,56 @@ "owner": "user_1" } }, + "schema": { + "columns": [{ + "data_type": "STRING", + "is_nullable": true, + "name": "tablename-common-column1", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + }, + { + "data_type": "STRING", + "is_nullable": true, + "name": "tablename-common-column2", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + }, + { + "data_type": "BIGNUMERIC", + "is_nullable": true, + "name": "tablename-common-column3", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + } + ] + }, "resource": { "name": "tablename-common", "service": "bigquery", "urn": "bigquery::gcpproject/dataset/tablename-common" } - } + }, + "owners": [{ + "urn": "owner::bq/3", + "name": "Mr.X", + "role": "admin" + }, + { + "urn": "owner::bq/4", + "name": "Mrs.Y", + "role": "user" + } + ] }, { "urn": "bigquery::gcpproject/dataset/tablename-mid", @@ -208,8 +281,7 @@ "data": { "preview": {}, "profile": { - "common_join": [ - { + "common_join": [{ "conditions": [ "ON target.column_1 = source.column_1 and target.column_3 = source.column_3 and DATE(target.event_timestamp) = DATE(source.event_timestamp)" ], @@ -241,12 +313,56 @@ "owner": "user_1" } }, + "schema": { + "columns": [{ + "data_type": "STRING", + "is_nullable": true, + "name": "tablename-mid-column1", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + }, + { + "data_type": "STRING", + "is_nullable": true, + "name": "tablename-mid-column2", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + }, + { + "data_type": "BIGNUMERIC", + "is_nullable": true, + "name": "tablename-mid-column3", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + } + ] + }, "resource": { "name": "tablename-mid", "service": "bigquery", "urn": "bigquery::gcpproject/dataset/tablename-mid" } - } + }, + "owners": [{ + "urn": "owner::bq/1", + "name": "John Smith", + "role": "user" + }, + { + "urn": "owner::bq/2", + "name": "Paul Smith", + "role": "user" + } + ] } ] } diff --git a/swagger.yaml b/swagger.yaml index 3d5a1362..27e39c97 100644 --- a/swagger.yaml +++ b/swagger.yaml @@ -276,6 +276,10 @@ paths: name: "rankby" type: string description: 'descendingly sort based on a numeric field in the record. the nested field is written with period separated field name. eg, "data.profile.usage_count"' + - in: query + name: "searchby" + type: string + description: 'search on a specific records field. the nested field is written with period separated field name. eg, "data.schema.columns.name"' responses: 200: description: OK From eb882aec233e0045e832b57cd43cd53c82edcdbc Mon Sep 17 00:00:00 2001 From: Muhammad Abduh Date: Wed, 15 Dec 2021 19:14:59 +0700 Subject: [PATCH 2/6] feat(record): add new email field in owner struct --- record/record.go | 7 ++++--- store/elasticsearch/search_test.go | 10 ++++++++++ .../testdata/search-test-fixture.json | 15 ++++++++++----- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/record/record.go b/record/record.go index 3b9f1ddb..546ede18 100644 --- a/record/record.go +++ b/record/record.go @@ -27,9 +27,10 @@ type LineageRecord struct { } type Owner struct { - URN string `json:"urn"` - Name string `json:"name"` - Role string `json:"role"` + URN string `json:"urn"` + Name string `json:"name"` + Role string `json:"role"` + Email string `json:"email"` } type ErrNoSuchRecord struct { diff --git a/store/elasticsearch/search_test.go b/store/elasticsearch/search_test.go index a6f8316c..1327b32d 100644 --- a/store/elasticsearch/search_test.go +++ b/store/elasticsearch/search_test.go @@ -164,6 +164,16 @@ func TestSearcherSearch(t *testing.T) { {Type: "table", RecordID: "bigquery::gcpproject/dataset/tablename-1"}, }, }, + { + Description: "should return 'bigquery::gcpproject/dataset/tablename-mid' resource if search by owner email field with text 'john.smith@email.com'", + Config: discovery.SearchConfig{ + Text: "johnsmith", + SearchByField: "owners.email", + }, + Expected: []expectedRow{ + {Type: "table", RecordID: "bigquery::gcpproject/dataset/tablename-mid"}, + }, + }, { Description: "should return 'bigquery::gcpproject/dataset/tablename-common' resource if search by table column name field with text 'tablename-common-column1'", Config: discovery.SearchConfig{ diff --git a/store/elasticsearch/testdata/search-test-fixture.json b/store/elasticsearch/testdata/search-test-fixture.json index 6d7576ed..3f411fdc 100644 --- a/store/elasticsearch/testdata/search-test-fixture.json +++ b/store/elasticsearch/testdata/search-test-fixture.json @@ -187,7 +187,8 @@ "owners": [{ "urn": "owner::bq/unique", "name": "Mrs Unique", - "role": "user" + "role": "user", + "email": "mrs.unique@email.com" }] }, { @@ -264,12 +265,14 @@ "owners": [{ "urn": "owner::bq/3", "name": "Mr.X", - "role": "admin" + "role": "admin", + "email": "mr.x@email.com" }, { "urn": "owner::bq/4", "name": "Mrs.Y", - "role": "user" + "role": "user", + "email": "mr.y@email.com" } ] }, @@ -355,12 +358,14 @@ "owners": [{ "urn": "owner::bq/1", "name": "John Smith", - "role": "user" + "role": "user", + "email": "john.smith@email.com" }, { "urn": "owner::bq/2", "name": "Paul Smith", - "role": "user" + "role": "user", + "email": "paul.smith@email.com" } ] } From d7545a887990075b320ba27603a09b666f19cd6c Mon Sep 17 00:00:00 2001 From: Muhammad Abduh Date: Thu, 16 Dec 2021 15:07:54 +0700 Subject: [PATCH 3/6] feat(discovery): update search by field to accept map string --- api/handlers/search_handler.go | 4 +- api/handlers/search_handler_test.go | 33 ++++++++++++++- api/handlers/utils.go | 14 +++++++ discovery/config.go | 8 +++- store/elasticsearch/search.go | 25 +++++++++--- store/elasticsearch/search_test.go | 40 ++++++------------- .../testdata/search-test-fixture.json | 8 +++- .../elasticsearch/testutil/elastic_search.go | 11 ++--- 8 files changed, 101 insertions(+), 42 deletions(-) diff --git a/api/handlers/search_handler.go b/api/handlers/search_handler.go index 74d8454d..d4ee7a1f 100644 --- a/api/handlers/search_handler.go +++ b/api/handlers/search_handler.go @@ -15,6 +15,8 @@ import ( var ( filterPrefix = "filter." whiteListQueryParamKey = "filter.type" + + queryPrefix = "query." ) type SearchHandler struct { @@ -77,7 +79,7 @@ func (handler *SearchHandler) buildSearchCfg(params url.Values) (cfg discovery.S cfg.MaxResults, _ = strconv.Atoi(params.Get("size")) cfg.Filters = filterConfigFromValues(params) cfg.RankBy = params.Get("rankby") - cfg.SearchByField = params.Get("searchby") + cfg.Queries = queryConfigFromValues(params) cfg.TypeWhiteList, err = parseTypeWhiteList(params) return } diff --git a/api/handlers/search_handler_test.go b/api/handlers/search_handler_test.go index d80c206a..5da7da7e 100644 --- a/api/handlers/search_handler_test.go +++ b/api/handlers/search_handler_test.go @@ -58,6 +58,30 @@ func TestSearchHandlerSearch(t *testing.T) { "service": {"kafka", "rabbitmq"}, "data.landscape": {"th"}, }, + Queries: make(map[string]string), + } + + searcher.On("Search", ctx, cfg).Return([]discovery.SearchResult{}, nil) + }, + ValidateResponse: func(tc testCase, body io.Reader) error { + return nil + }, + }, + { + Title: "should pass queries to search config format", + Querystring: "text=resource&landscape=id,vn&filter.data.landscape=th&filter.type=topic&filter.service=kafka,rabbitmq&query.data.columns.name=timestamp&query.owners.email=john.doe@email.com", + InitSearcher: func(tc testCase, searcher *mock.RecordSearcher) { + cfg := discovery.SearchConfig{ + Text: "resource", + TypeWhiteList: []string{"topic"}, + Filters: map[string][]string{ + "service": {"kafka", "rabbitmq"}, + "data.landscape": {"th"}, + }, + Queries: map[string]string{ + "data.columns.name": "timestamp", + "owners.email": "john.doe@email.com", + }, } searcher.On("Search", ctx, cfg).Return([]discovery.SearchResult{}, nil) @@ -73,6 +97,7 @@ func TestSearchHandlerSearch(t *testing.T) { cfg := discovery.SearchConfig{ Text: "test", Filters: make(map[string][]string), + Queries: make(map[string]string), } response := []discovery.SearchResult{ { @@ -123,6 +148,7 @@ func TestSearchHandlerSearch(t *testing.T) { Text: "resource", MaxResults: 10, Filters: make(map[string][]string), + Queries: make(map[string]string), } var results []discovery.SearchResult @@ -221,6 +247,7 @@ func TestSearchHandlerSuggest(t *testing.T) { cfg := discovery.SearchConfig{ Text: "test", Filters: map[string][]string{}, + Queries: make(map[string]string), } searcher.On("Suggest", ctx, cfg).Return([]string{}, fmt.Errorf("service unavailable")) }, @@ -228,7 +255,7 @@ func TestSearchHandlerSuggest(t *testing.T) { }, { Title: "should pass filter to search config format", - Querystring: "text=resource&landscape=id,vn&filter.data.landscape=th&filter.type=topic&filter.service=kafka,rabbitmq", + Querystring: "text=resource&landscape=id,vn&query.description=this is my dashboard&filter.data.landscape=th&filter.type=topic&filter.service=kafka,rabbitmq", InitSearcher: func(tc testCase, searcher *mock.RecordSearcher) { cfg := discovery.SearchConfig{ Text: "resource", @@ -237,6 +264,9 @@ func TestSearchHandlerSuggest(t *testing.T) { "service": {"kafka", "rabbitmq"}, "data.landscape": {"th"}, }, + Queries: map[string]string{ + "description": "this is my dashboard", + }, } searcher.On("Suggest", ctx, cfg).Return([]string{}, nil) @@ -252,6 +282,7 @@ func TestSearchHandlerSuggest(t *testing.T) { cfg := discovery.SearchConfig{ Text: "test", Filters: make(map[string][]string), + Queries: make(map[string]string), } response := []string{ "test", diff --git a/api/handlers/utils.go b/api/handlers/utils.go index 128086e0..f93cc493 100644 --- a/api/handlers/utils.go +++ b/api/handlers/utils.go @@ -24,3 +24,17 @@ func filterConfigFromValues(querystring url.Values) map[string][]string { } return filter } + +func queryConfigFromValues(querystring url.Values) map[string]string { + var query = make(map[string]string) + for key, values := range querystring { + // filters are of form "query.{field}" + if !strings.HasPrefix(key, queryPrefix) { + continue + } + + queryKey := strings.TrimPrefix(key, queryPrefix) + query[queryKey] = values[0] // cannot have duplicate query key, always get the first one + } + return query +} diff --git a/discovery/config.go b/discovery/config.go index 0a985678..7a3b4c76 100644 --- a/discovery/config.go +++ b/discovery/config.go @@ -4,6 +4,10 @@ package discovery // criteria for operations involving record search type RecordFilter = map[string][]string +// RecordQuery is a param intended to be used as a match search +// criteria for operations involving record search +type RecordQuery = map[string]string + // SearchConfig represents a search query along // with any corresponding filter(s) type SearchConfig struct { @@ -24,6 +28,6 @@ type SearchConfig struct { // RankBy is a param to rank based on a specific parameter RankBy string - // SearchByField is a param to search a resource based on record's field - SearchByField string + // Queries is a param to search a resource based on record's fields + Queries RecordQuery } diff --git a/store/elasticsearch/search.go b/store/elasticsearch/search.go index c9ae83c0..2a73b4f4 100644 --- a/store/elasticsearch/search.go +++ b/store/elasticsearch/search.go @@ -204,16 +204,31 @@ func (sr *Searcher) buildSuggestQuery(ctx context.Context, cfg discovery.SearchC } func (sr *Searcher) buildTextQuery(ctx context.Context, cfg discovery.SearchConfig) elastic.Query { - if cfg.SearchByField == "" { + if len(cfg.Queries) == 0 { return sr.buildTextQueryGeneric(ctx, cfg.Text) } else { - return sr.buildTextQueryByField(ctx, cfg.Text, cfg.SearchByField) + return sr.buildTextQueryByField(ctx, cfg.Text, cfg.Queries) } } -func (sr *Searcher) buildTextQueryByField(ctx context.Context, text string, field string) elastic.Query { - return elastic.NewMatchQuery(field, text). - Fuzziness("AUTO") +func (sr *Searcher) buildTextQueryByField(ctx context.Context, text string, queries map[string]string) elastic.Query { + esQueries := []elastic.Query{ + elastic. + NewMultiMatchQuery( + text, + ). + Fuzziness("AUTO"), + } + + for field, value := range queries { + esQueries = append(esQueries, + elastic. + NewMatchQuery(field, value). + Fuzziness("AUTO")) + } + + return elastic.NewBoolQuery(). + Should(esQueries...) } func (sr *Searcher) buildTextQueryGeneric(ctx context.Context, text string) elastic.Query { diff --git a/store/elasticsearch/search_test.go b/store/elasticsearch/search_test.go index 1327b32d..a025746e 100644 --- a/store/elasticsearch/search_test.go +++ b/store/elasticsearch/search_test.go @@ -144,46 +144,32 @@ func TestSearcherSearch(t *testing.T) { }, }, { - Description: "should return order-topic resource if search by description field with text 'submitted order'", + Description: "should return consumer-topic if search by query description field with text 'customer update' and owners name 'johndoe'", Config: discovery.SearchConfig{ - Text: "submitted order", - SearchByField: "description", + Text: "consumer", + Queries: map[string]string{ + "description": "customer update", + "owners.name": "johndoe", + }, }, Expected: []expectedRow{ - {Type: "topic", RecordID: "order-topic"}, - {Type: "topic", RecordID: "purchase-topic"}, + {Type: "topic", RecordID: "consumer-topic"}, }, }, { - Description: "should return 'bigquery::gcpproject/dataset/tablename-1' resource if search by owner name field with text 'unique'", + Description: "should return 'bigquery::gcpproject/dataset/tablename-common' resource on top if search by query table column name field with text 'tablename-common-column1'", Config: discovery.SearchConfig{ - Text: "unique", - SearchByField: "owners.name", + Text: "tablename", + Queries: map[string]string{ + "data.schema.columns.name": "tablename-common-column1", + }, }, Expected: []expectedRow{ + {Type: "table", RecordID: "bigquery::gcpproject/dataset/tablename-common"}, {Type: "table", RecordID: "bigquery::gcpproject/dataset/tablename-1"}, - }, - }, - { - Description: "should return 'bigquery::gcpproject/dataset/tablename-mid' resource if search by owner email field with text 'john.smith@email.com'", - Config: discovery.SearchConfig{ - Text: "johnsmith", - SearchByField: "owners.email", - }, - Expected: []expectedRow{ {Type: "table", RecordID: "bigquery::gcpproject/dataset/tablename-mid"}, }, }, - { - Description: "should return 'bigquery::gcpproject/dataset/tablename-common' resource if search by table column name field with text 'tablename-common-column1'", - Config: discovery.SearchConfig{ - Text: "common", - SearchByField: "data.schema.columns.name", - }, - Expected: []expectedRow{ - {Type: "table", RecordID: "bigquery::gcpproject/dataset/tablename-common"}, - }, - }, } for _, test := range tests { t.Run(test.Description, func(t *testing.T) { diff --git a/store/elasticsearch/testdata/search-test-fixture.json b/store/elasticsearch/testdata/search-test-fixture.json index 3f411fdc..cbf261ea 100644 --- a/store/elasticsearch/testdata/search-test-fixture.json +++ b/store/elasticsearch/testdata/search-test-fixture.json @@ -40,7 +40,13 @@ "environment": "production", "country": "id", "partition": 50 - } + }, + "owners": [{ + "urn": "owner::topic/1", + "name": "John Doe", + "role": "user", + "email": "john.doe@email.com" + }] }, { "urn": "transaction", diff --git a/store/elasticsearch/testutil/elastic_search.go b/store/elasticsearch/testutil/elastic_search.go index d47df3d8..723b7fe5 100644 --- a/store/elasticsearch/testutil/elastic_search.go +++ b/store/elasticsearch/testutil/elastic_search.go @@ -10,6 +10,7 @@ import ( "time" "github.com/elastic/go-elasticsearch/v7" + "github.com/elastic/go-elasticsearch/v7/estransport" ) var ( @@ -95,11 +96,11 @@ func NewElasticsearchTestServer() *ElasticsearchTestServer { server.url.String(), }, // uncomment below code to debug request and response to elasticsearch - // Logger: &estransport.ColorLogger{ - // Output: os.Stdout, - // EnableRequestBody: true, - // EnableResponseBody: true, - // }, + Logger: &estransport.ColorLogger{ + Output: os.Stdout, + EnableRequestBody: true, + EnableResponseBody: true, + }, }, ) if err != nil { From 4a539483e29b7702bf065fce5e6756f32ab3ea4f Mon Sep 17 00:00:00 2001 From: Muhammad Abduh Date: Thu, 16 Dec 2021 15:32:57 +0700 Subject: [PATCH 4/6] feat(discovery): add test to filter by a field --- store/elasticsearch/search_test.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/store/elasticsearch/search_test.go b/store/elasticsearch/search_test.go index a025746e..47e52b5b 100644 --- a/store/elasticsearch/search_test.go +++ b/store/elasticsearch/search_test.go @@ -131,6 +131,18 @@ func TestSearcherSearch(t *testing.T) { {Type: "topic", RecordID: "consumer-topic"}, }, }, + { + Description: "should return 'consumer-topic' if filter owner email with 'john.doe@email.com'", + Config: discovery.SearchConfig{ + Text: "topic", + Filters: map[string][]string{ + "owners.email.keyword": {"john.doe@email.com"}, + }, + }, + Expected: []expectedRow{ + {Type: "topic", RecordID: "consumer-topic"}, + }, + }, { Description: "should return a descendingly sorted based on usage count in search results if rank by usage in the config", Config: discovery.SearchConfig{ From 7ab1505c7e0b6601d1a69594cc7235730a4dc68e Mon Sep 17 00:00:00 2001 From: Muhammad Abduh Date: Thu, 16 Dec 2021 16:25:40 +0700 Subject: [PATCH 5/6] fix(discovery): append keyword to filter field --- store/elasticsearch/search.go | 1 + store/elasticsearch/search_test.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/store/elasticsearch/search.go b/store/elasticsearch/search.go index 2a73b4f4..1e06d499 100644 --- a/store/elasticsearch/search.go +++ b/store/elasticsearch/search.go @@ -274,6 +274,7 @@ func (sr *Searcher) buildFilterQueries(query elastic.Query, filters map[string][ values = append(values, rawVal) } + key := fmt.Sprintf("%s.keyword", key) filterQueries = append( filterQueries, elastic.NewTermsQuery(key, values...), diff --git a/store/elasticsearch/search_test.go b/store/elasticsearch/search_test.go index 47e52b5b..ff77cc3c 100644 --- a/store/elasticsearch/search_test.go +++ b/store/elasticsearch/search_test.go @@ -136,7 +136,7 @@ func TestSearcherSearch(t *testing.T) { Config: discovery.SearchConfig{ Text: "topic", Filters: map[string][]string{ - "owners.email.keyword": {"john.doe@email.com"}, + "owners.email": {"john.doe@email.com"}, }, }, Expected: []expectedRow{ From dbd4a85f4eb3d3a6b7fe6a627c86f5db858e9903 Mon Sep 17 00:00:00 2001 From: Muhammad Abduh Date: Thu, 16 Dec 2021 19:25:57 +0700 Subject: [PATCH 6/6] fix(discovery): add filter match query --- discovery/config.go | 6 +- store/elasticsearch/search.go | 55 ++++++++----------- store/elasticsearch/search_test.go | 14 +++-- .../testdata/search-test-fixture.json | 24 +++++++- .../elasticsearch/testutil/elastic_search.go | 11 ++-- 5 files changed, 59 insertions(+), 51 deletions(-) diff --git a/discovery/config.go b/discovery/config.go index 7a3b4c76..a7bb538e 100644 --- a/discovery/config.go +++ b/discovery/config.go @@ -4,10 +4,6 @@ package discovery // criteria for operations involving record search type RecordFilter = map[string][]string -// RecordQuery is a param intended to be used as a match search -// criteria for operations involving record search -type RecordQuery = map[string]string - // SearchConfig represents a search query along // with any corresponding filter(s) type SearchConfig struct { @@ -29,5 +25,5 @@ type SearchConfig struct { RankBy string // Queries is a param to search a resource based on record's fields - Queries RecordQuery + Queries map[string]string } diff --git a/store/elasticsearch/search.go b/store/elasticsearch/search.go index 1e06d499..06ef68b7 100644 --- a/store/elasticsearch/search.go +++ b/store/elasticsearch/search.go @@ -164,8 +164,9 @@ func anyValidStringSlice(slices ...[]string) []string { func (sr *Searcher) buildQuery(ctx context.Context, cfg discovery.SearchConfig, indices []string) (io.Reader, error) { var query elastic.Query - query = sr.buildTextQuery(ctx, cfg) - query = sr.buildFilterQueries(query, cfg.Filters) + query = sr.buildTextQuery(ctx, cfg.Text) + query = sr.buildFilterTermQueries(query, cfg.Filters) + query = sr.buildFilterMatchQueries(ctx, query, cfg.Queries) query = sr.buildFunctionScoreQuery(query, cfg.RankBy) src, err := query.Source() @@ -203,35 +204,7 @@ func (sr *Searcher) buildSuggestQuery(ctx context.Context, cfg discovery.SearchC return payload, err } -func (sr *Searcher) buildTextQuery(ctx context.Context, cfg discovery.SearchConfig) elastic.Query { - if len(cfg.Queries) == 0 { - return sr.buildTextQueryGeneric(ctx, cfg.Text) - } else { - return sr.buildTextQueryByField(ctx, cfg.Text, cfg.Queries) - } -} - -func (sr *Searcher) buildTextQueryByField(ctx context.Context, text string, queries map[string]string) elastic.Query { - esQueries := []elastic.Query{ - elastic. - NewMultiMatchQuery( - text, - ). - Fuzziness("AUTO"), - } - - for field, value := range queries { - esQueries = append(esQueries, - elastic. - NewMatchQuery(field, value). - Fuzziness("AUTO")) - } - - return elastic.NewBoolQuery(). - Should(esQueries...) -} - -func (sr *Searcher) buildTextQueryGeneric(ctx context.Context, text string) elastic.Query { +func (sr *Searcher) buildTextQuery(ctx context.Context, text string) elastic.Query { boostedFields := []string{ "urn^10", "name^5", @@ -258,7 +231,25 @@ func (sr *Searcher) buildTextQueryGeneric(ctx context.Context, text string) elas ) } -func (sr *Searcher) buildFilterQueries(query elastic.Query, filters map[string][]string) elastic.Query { +func (sr *Searcher) buildFilterMatchQueries(ctx context.Context, query elastic.Query, queries map[string]string) elastic.Query { + if len(queries) == 0 { + return query + } + + esQueries := []elastic.Query{} + for field, value := range queries { + esQueries = append(esQueries, + elastic. + NewMatchQuery(field, value). + Fuzziness("AUTO")) + } + + return elastic.NewBoolQuery(). + Should(query). + Filter(esQueries...) +} + +func (sr *Searcher) buildFilterTermQueries(query elastic.Query, filters map[string][]string) elastic.Query { if len(filters) == 0 { return query } diff --git a/store/elasticsearch/search_test.go b/store/elasticsearch/search_test.go index ff77cc3c..07a091fe 100644 --- a/store/elasticsearch/search_test.go +++ b/store/elasticsearch/search_test.go @@ -67,6 +67,7 @@ func TestSearcherSearch(t *testing.T) { {Type: "topic", RecordID: "order-topic"}, {Type: "topic", RecordID: "purchase-topic"}, {Type: "topic", RecordID: "consumer-topic"}, + {Type: "topic", RecordID: "consumer-mq-2"}, }, }, { @@ -78,6 +79,7 @@ func TestSearcherSearch(t *testing.T) { {Type: "topic", RecordID: "order-topic"}, {Type: "topic", RecordID: "purchase-topic"}, {Type: "topic", RecordID: "consumer-topic"}, + {Type: "topic", RecordID: "consumer-mq-2"}, }, }, { @@ -115,6 +117,7 @@ func TestSearcherSearch(t *testing.T) { Expected: []expectedRow{ {Type: "topic", RecordID: "order-topic"}, {Type: "topic", RecordID: "consumer-topic"}, + {Type: "topic", RecordID: "consumer-mq-2"}, }, }, { @@ -129,6 +132,7 @@ func TestSearcherSearch(t *testing.T) { }, Expected: []expectedRow{ {Type: "topic", RecordID: "consumer-topic"}, + {Type: "topic", RecordID: "consumer-mq-2"}, }, }, { @@ -156,12 +160,12 @@ func TestSearcherSearch(t *testing.T) { }, }, { - Description: "should return consumer-topic if search by query description field with text 'customer update' and owners name 'johndoe'", + Description: "should return consumer-topic if search by query description field with text 'rabbitmq' and owners name 'johndoe'", Config: discovery.SearchConfig{ Text: "consumer", Queries: map[string]string{ - "description": "customer update", - "owners.name": "johndoe", + "description": "rabbitmq", + "owners.name": "john doe", }, }, Expected: []expectedRow{ @@ -173,13 +177,11 @@ func TestSearcherSearch(t *testing.T) { Config: discovery.SearchConfig{ Text: "tablename", Queries: map[string]string{ - "data.schema.columns.name": "tablename-common-column1", + "data.schema.columns.name": "common", }, }, Expected: []expectedRow{ {Type: "table", RecordID: "bigquery::gcpproject/dataset/tablename-common"}, - {Type: "table", RecordID: "bigquery::gcpproject/dataset/tablename-1"}, - {Type: "table", RecordID: "bigquery::gcpproject/dataset/tablename-mid"}, }, }, } diff --git a/store/elasticsearch/testdata/search-test-fixture.json b/store/elasticsearch/testdata/search-test-fixture.json index cbf261ea..14787e44 100644 --- a/store/elasticsearch/testdata/search-test-fixture.json +++ b/store/elasticsearch/testdata/search-test-fixture.json @@ -32,10 +32,10 @@ "urn": "consumer-topic", "name": "consumer-topic", "service": "rabbitmq", - "description": "Update on every customer creation/update", + "description": "Update on every rabbitmq customer creation/update", "data": { "topic_name": "consumer-topic", - "description": "Update on every customer creation/update", + "description": "Update on every rabbitmq customer creation/update", "company": "odpf", "environment": "production", "country": "id", @@ -48,6 +48,26 @@ "email": "john.doe@email.com" }] }, + { + "urn": "consumer-mq-2", + "name": "consumer-mq-2", + "service": "rabbitmq", + "description": "Another rabbitmq topic", + "data": { + "topic_name": "consumer-mq-2", + "description": "Another rabbitmq topic", + "company": "odpf", + "environment": "production", + "country": "id", + "partition": 50 + }, + "owners": [{ + "urn": "owner::topic/22", + "name": "Mary Jane", + "role": "user", + "email": "mary.jane@email.com" + }] + }, { "urn": "transaction", "name": "transaction", diff --git a/store/elasticsearch/testutil/elastic_search.go b/store/elasticsearch/testutil/elastic_search.go index 723b7fe5..d47df3d8 100644 --- a/store/elasticsearch/testutil/elastic_search.go +++ b/store/elasticsearch/testutil/elastic_search.go @@ -10,7 +10,6 @@ import ( "time" "github.com/elastic/go-elasticsearch/v7" - "github.com/elastic/go-elasticsearch/v7/estransport" ) var ( @@ -96,11 +95,11 @@ func NewElasticsearchTestServer() *ElasticsearchTestServer { server.url.String(), }, // uncomment below code to debug request and response to elasticsearch - Logger: &estransport.ColorLogger{ - Output: os.Stdout, - EnableRequestBody: true, - EnableResponseBody: true, - }, + // Logger: &estransport.ColorLogger{ + // Output: os.Stdout, + // EnableRequestBody: true, + // EnableResponseBody: true, + // }, }, ) if err != nil {