diff --git a/api/handlers/search_handler.go b/api/handlers/search_handler.go index 843eb66b..d4ee7a1f 100644 --- a/api/handlers/search_handler.go +++ b/api/handlers/search_handler.go @@ -15,6 +15,8 @@ import ( var ( filterPrefix = "filter." whiteListQueryParamKey = "filter.type" + + queryPrefix = "query." ) type SearchHandler struct { @@ -77,6 +79,7 @@ func (handler *SearchHandler) buildSearchCfg(params url.Values) (cfg discovery.S cfg.MaxResults, _ = strconv.Atoi(params.Get("size")) cfg.Filters = filterConfigFromValues(params) cfg.RankBy = params.Get("rankby") + cfg.Queries = queryConfigFromValues(params) cfg.TypeWhiteList, err = parseTypeWhiteList(params) return } diff --git a/api/handlers/search_handler_test.go b/api/handlers/search_handler_test.go index d80c206a..5da7da7e 100644 --- a/api/handlers/search_handler_test.go +++ b/api/handlers/search_handler_test.go @@ -58,6 +58,30 @@ func TestSearchHandlerSearch(t *testing.T) { "service": {"kafka", "rabbitmq"}, "data.landscape": {"th"}, }, + Queries: make(map[string]string), + } + + searcher.On("Search", ctx, cfg).Return([]discovery.SearchResult{}, nil) + }, + ValidateResponse: func(tc testCase, body io.Reader) error { + return nil + }, + }, + { + Title: "should pass queries to search config format", + Querystring: "text=resource&landscape=id,vn&filter.data.landscape=th&filter.type=topic&filter.service=kafka,rabbitmq&query.data.columns.name=timestamp&query.owners.email=john.doe@email.com", + InitSearcher: func(tc testCase, searcher *mock.RecordSearcher) { + cfg := discovery.SearchConfig{ + Text: "resource", + TypeWhiteList: []string{"topic"}, + Filters: map[string][]string{ + "service": {"kafka", "rabbitmq"}, + "data.landscape": {"th"}, + }, + Queries: map[string]string{ + "data.columns.name": "timestamp", + "owners.email": "john.doe@email.com", + }, } searcher.On("Search", ctx, cfg).Return([]discovery.SearchResult{}, nil) @@ -73,6 +97,7 @@ func TestSearchHandlerSearch(t *testing.T) { cfg := discovery.SearchConfig{ Text: "test", Filters: make(map[string][]string), + Queries: make(map[string]string), } response := []discovery.SearchResult{ { @@ -123,6 +148,7 @@ func TestSearchHandlerSearch(t *testing.T) { Text: "resource", MaxResults: 10, Filters: make(map[string][]string), + Queries: make(map[string]string), } var results []discovery.SearchResult @@ -221,6 +247,7 @@ func TestSearchHandlerSuggest(t *testing.T) { cfg := discovery.SearchConfig{ Text: "test", Filters: map[string][]string{}, + Queries: make(map[string]string), } searcher.On("Suggest", ctx, cfg).Return([]string{}, fmt.Errorf("service unavailable")) }, @@ -228,7 +255,7 @@ func TestSearchHandlerSuggest(t *testing.T) { }, { Title: "should pass filter to search config format", - Querystring: "text=resource&landscape=id,vn&filter.data.landscape=th&filter.type=topic&filter.service=kafka,rabbitmq", + Querystring: "text=resource&landscape=id,vn&query.description=this is my dashboard&filter.data.landscape=th&filter.type=topic&filter.service=kafka,rabbitmq", InitSearcher: func(tc testCase, searcher *mock.RecordSearcher) { cfg := discovery.SearchConfig{ Text: "resource", @@ -237,6 +264,9 @@ func TestSearchHandlerSuggest(t *testing.T) { "service": {"kafka", "rabbitmq"}, "data.landscape": {"th"}, }, + Queries: map[string]string{ + "description": "this is my dashboard", + }, } searcher.On("Suggest", ctx, cfg).Return([]string{}, nil) @@ -252,6 +282,7 @@ func TestSearchHandlerSuggest(t *testing.T) { cfg := discovery.SearchConfig{ Text: "test", Filters: make(map[string][]string), + Queries: make(map[string]string), } response := []string{ "test", diff --git a/api/handlers/utils.go b/api/handlers/utils.go index 128086e0..f93cc493 100644 --- a/api/handlers/utils.go +++ b/api/handlers/utils.go @@ -24,3 +24,17 @@ func filterConfigFromValues(querystring url.Values) map[string][]string { } return filter } + +func queryConfigFromValues(querystring url.Values) map[string]string { + var query = make(map[string]string) + for key, values := range querystring { + // filters are of form "query.{field}" + if !strings.HasPrefix(key, queryPrefix) { + continue + } + + queryKey := strings.TrimPrefix(key, queryPrefix) + query[queryKey] = values[0] // cannot have duplicate query key, always get the first one + } + return query +} diff --git a/cmd/serve.go b/cmd/serve.go index a1e5b7a5..1275e8bc 100644 --- a/cmd/serve.go +++ b/cmd/serve.go @@ -130,6 +130,12 @@ func initElasticsearch(config Config) *elasticsearch.Client { esClient, err := elasticsearch.NewClient(elasticsearch.Config{ Addresses: brokers, Transport: nrelasticsearch.NewRoundTripper(nil), + // uncomment below code to debug request and response to elasticsearch + // Logger: &estransport.ColorLogger{ + // Output: os.Stdout, + // EnableRequestBody: true, + // EnableResponseBody: true, + // }, }) if err != nil { log.Fatalf("error connecting to elasticsearch: %v", err) diff --git a/discovery/config.go b/discovery/config.go index 1c1f176d..a7bb538e 100644 --- a/discovery/config.go +++ b/discovery/config.go @@ -23,4 +23,7 @@ type SearchConfig struct { // RankBy is a param to rank based on a specific parameter RankBy string + + // Queries is a param to search a resource based on record's fields + Queries map[string]string } diff --git a/record/record.go b/record/record.go index 1f376eb9..546ede18 100644 --- a/record/record.go +++ b/record/record.go @@ -14,6 +14,7 @@ type Record struct { Data map[string]interface{} `json:"data"` Labels map[string]string `json:"labels"` Tags []string `json:"tags"` + Owners []Owner `json:"owners"` Upstreams []LineageRecord `json:"upstreams"` Downstreams []LineageRecord `json:"downstreams"` CreatedAt time.Time `json:"created_at"` @@ -25,6 +26,13 @@ type LineageRecord struct { Type string `json:"type"` } +type Owner struct { + URN string `json:"urn"` + Name string `json:"name"` + Role string `json:"role"` + Email string `json:"email"` +} + type ErrNoSuchRecord struct { RecordID string } diff --git a/store/elasticsearch/search.go b/store/elasticsearch/search.go index bc83f01c..06ef68b7 100644 --- a/store/elasticsearch/search.go +++ b/store/elasticsearch/search.go @@ -165,7 +165,8 @@ func (sr *Searcher) buildQuery(ctx context.Context, cfg discovery.SearchConfig, var query elastic.Query query = sr.buildTextQuery(ctx, cfg.Text) - query = sr.buildFilterQueries(query, cfg.Filters) + query = sr.buildFilterTermQueries(query, cfg.Filters) + query = sr.buildFilterMatchQueries(ctx, query, cfg.Queries) query = sr.buildFunctionScoreQuery(query, cfg.RankBy) src, err := query.Source() @@ -230,7 +231,25 @@ func (sr *Searcher) buildTextQuery(ctx context.Context, text string) elastic.Que ) } -func (sr *Searcher) buildFilterQueries(query elastic.Query, filters map[string][]string) elastic.Query { +func (sr *Searcher) buildFilterMatchQueries(ctx context.Context, query elastic.Query, queries map[string]string) elastic.Query { + if len(queries) == 0 { + return query + } + + esQueries := []elastic.Query{} + for field, value := range queries { + esQueries = append(esQueries, + elastic. + NewMatchQuery(field, value). + Fuzziness("AUTO")) + } + + return elastic.NewBoolQuery(). + Should(query). + Filter(esQueries...) +} + +func (sr *Searcher) buildFilterTermQueries(query elastic.Query, filters map[string][]string) elastic.Query { if len(filters) == 0 { return query } @@ -246,6 +265,7 @@ func (sr *Searcher) buildFilterQueries(query elastic.Query, filters map[string][ values = append(values, rawVal) } + key := fmt.Sprintf("%s.keyword", key) filterQueries = append( filterQueries, elastic.NewTermsQuery(key, values...), diff --git a/store/elasticsearch/search_test.go b/store/elasticsearch/search_test.go index 663f4efd..07a091fe 100644 --- a/store/elasticsearch/search_test.go +++ b/store/elasticsearch/search_test.go @@ -67,6 +67,7 @@ func TestSearcherSearch(t *testing.T) { {Type: "topic", RecordID: "order-topic"}, {Type: "topic", RecordID: "purchase-topic"}, {Type: "topic", RecordID: "consumer-topic"}, + {Type: "topic", RecordID: "consumer-mq-2"}, }, }, { @@ -78,6 +79,7 @@ func TestSearcherSearch(t *testing.T) { {Type: "topic", RecordID: "order-topic"}, {Type: "topic", RecordID: "purchase-topic"}, {Type: "topic", RecordID: "consumer-topic"}, + {Type: "topic", RecordID: "consumer-mq-2"}, }, }, { @@ -115,6 +117,7 @@ func TestSearcherSearch(t *testing.T) { Expected: []expectedRow{ {Type: "topic", RecordID: "order-topic"}, {Type: "topic", RecordID: "consumer-topic"}, + {Type: "topic", RecordID: "consumer-mq-2"}, }, }, { @@ -127,6 +130,19 @@ func TestSearcherSearch(t *testing.T) { "data.company": {"odpf"}, }, }, + Expected: []expectedRow{ + {Type: "topic", RecordID: "consumer-topic"}, + {Type: "topic", RecordID: "consumer-mq-2"}, + }, + }, + { + Description: "should return 'consumer-topic' if filter owner email with 'john.doe@email.com'", + Config: discovery.SearchConfig{ + Text: "topic", + Filters: map[string][]string{ + "owners.email": {"john.doe@email.com"}, + }, + }, Expected: []expectedRow{ {Type: "topic", RecordID: "consumer-topic"}, }, @@ -143,6 +159,31 @@ func TestSearcherSearch(t *testing.T) { {Type: "table", RecordID: "bigquery::gcpproject/dataset/tablename-1"}, }, }, + { + Description: "should return consumer-topic if search by query description field with text 'rabbitmq' and owners name 'johndoe'", + Config: discovery.SearchConfig{ + Text: "consumer", + Queries: map[string]string{ + "description": "rabbitmq", + "owners.name": "john doe", + }, + }, + Expected: []expectedRow{ + {Type: "topic", RecordID: "consumer-topic"}, + }, + }, + { + Description: "should return 'bigquery::gcpproject/dataset/tablename-common' resource on top if search by query table column name field with text 'tablename-common-column1'", + Config: discovery.SearchConfig{ + Text: "tablename", + Queries: map[string]string{ + "data.schema.columns.name": "common", + }, + }, + Expected: []expectedRow{ + {Type: "table", RecordID: "bigquery::gcpproject/dataset/tablename-common"}, + }, + }, } for _, test := range tests { t.Run(test.Description, func(t *testing.T) { diff --git a/store/elasticsearch/testdata/search-test-fixture.json b/store/elasticsearch/testdata/search-test-fixture.json index 077debb4..14787e44 100644 --- a/store/elasticsearch/testdata/search-test-fixture.json +++ b/store/elasticsearch/testdata/search-test-fixture.json @@ -1,8 +1,6 @@ -[ - { +[{ "type": "topic", - "records": [ - { + "records": [{ "urn": "order-topic", "name": "order-topic", "service": "kafka", @@ -34,15 +32,41 @@ "urn": "consumer-topic", "name": "consumer-topic", "service": "rabbitmq", - "description": "Update on every customer creation/update", + "description": "Update on every rabbitmq customer creation/update", "data": { "topic_name": "consumer-topic", - "description": "Update on every customer creation/update", + "description": "Update on every rabbitmq customer creation/update", "company": "odpf", "environment": "production", "country": "id", "partition": 50 - } + }, + "owners": [{ + "urn": "owner::topic/1", + "name": "John Doe", + "role": "user", + "email": "john.doe@email.com" + }] + }, + { + "urn": "consumer-mq-2", + "name": "consumer-mq-2", + "service": "rabbitmq", + "description": "Another rabbitmq topic", + "data": { + "topic_name": "consumer-mq-2", + "description": "Another rabbitmq topic", + "company": "odpf", + "environment": "production", + "country": "id", + "partition": 50 + }, + "owners": [{ + "urn": "owner::topic/22", + "name": "Mary Jane", + "role": "user", + "email": "mary.jane@email.com" + }] }, { "urn": "transaction", @@ -61,8 +85,7 @@ }, { "type": "table", - "records": [ - { + "records": [{ "urn": "au2-microsoft-invoice", "name": "microsoft-invoice", "service": "postgres", @@ -75,8 +98,7 @@ "country": "us", "description": "Transaction records for every microsoft purchase", "total_rows": 100, - "schema": [ - { + "schema": [{ "name": "id" }, { @@ -103,8 +125,7 @@ "country": "id", "description": "Transaction records for every Apple purchase", "total_rows": 100, - "schema": [ - { + "schema": [{ "name": "id" }, { @@ -126,15 +147,13 @@ "data": { "preview": {}, "profile": { - "common_join": [ - { - "conditions": [ - "ON target.column_1 = source.column_1 and target.column_3 = source.column_3 and DATE(target.event_timestamp) = DATE(source.event_timestamp)" - ], - "count": 1, - "urn": "bigquery::gcpproject/dataset/tablename-mid" - } - ], + "common_join": [{ + "conditions": [ + "ON target.column_1 = source.column_1 and target.column_3 = source.column_3 and DATE(target.event_timestamp) = DATE(source.event_timestamp)" + ], + "count": 1, + "urn": "bigquery::gcpproject/dataset/tablename-mid" + }], "filter_conditions": [ "WHERE t.column_5 = 'success' AND t.item_id = \"280481a2-2384-4b81-aa3e-214ac60b31db\" AND event_timestamp >= TIMESTAMP(\"2021-10-29\", \"UTC\") AND event_timestamp < TIMESTAMP(\"2021-11-22T02:01:06Z\")" ], @@ -152,12 +171,51 @@ "owner": "user_1" } }, + "schema": { + "columns": [{ + "data_type": "STRING", + "is_nullable": true, + "name": "tablename-1-column1", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + }, + { + "data_type": "STRING", + "is_nullable": true, + "name": "tablename-1-column2", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + }, + { + "data_type": "BIGNUMERIC", + "is_nullable": true, + "name": "tablename-1-column3", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + } + ] + }, "resource": { "name": "tablename-1", "service": "bigquery", "urn": "bigquery::gcpproject/dataset/tablename-1" } - } + }, + "owners": [{ + "urn": "owner::bq/unique", + "name": "Mrs Unique", + "role": "user", + "email": "mrs.unique@email.com" + }] }, { "urn": "bigquery::gcpproject/dataset/tablename-common", @@ -167,15 +225,13 @@ "data": { "preview": {}, "profile": { - "common_join": [ - { - "conditions": [ - "ON target.column_1 = source.column_1 and target.column_3 = source.column_3 and DATE(target.event_timestamp) = DATE(source.event_timestamp)" - ], - "count": 1, - "urn": "bigquery::gcpproject/dataset/tablename-mid" - } - ], + "common_join": [{ + "conditions": [ + "ON target.column_1 = source.column_1 and target.column_3 = source.column_3 and DATE(target.event_timestamp) = DATE(source.event_timestamp)" + ], + "count": 1, + "urn": "bigquery::gcpproject/dataset/tablename-mid" + }], "filter_conditions": [ "WHERE t.column_5 = 'success' AND t.item_id = \"280481a2-2384-4b81-aa3e-214ac60b31db\" AND event_timestamp >= TIMESTAMP(\"2021-10-29\", \"UTC\") AND event_timestamp < TIMESTAMP(\"2021-11-22T02:01:06Z\")" ], @@ -193,12 +249,58 @@ "owner": "user_1" } }, + "schema": { + "columns": [{ + "data_type": "STRING", + "is_nullable": true, + "name": "tablename-common-column1", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + }, + { + "data_type": "STRING", + "is_nullable": true, + "name": "tablename-common-column2", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + }, + { + "data_type": "BIGNUMERIC", + "is_nullable": true, + "name": "tablename-common-column3", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + } + ] + }, "resource": { "name": "tablename-common", "service": "bigquery", "urn": "bigquery::gcpproject/dataset/tablename-common" } - } + }, + "owners": [{ + "urn": "owner::bq/3", + "name": "Mr.X", + "role": "admin", + "email": "mr.x@email.com" + }, + { + "urn": "owner::bq/4", + "name": "Mrs.Y", + "role": "user", + "email": "mr.y@email.com" + } + ] }, { "urn": "bigquery::gcpproject/dataset/tablename-mid", @@ -208,8 +310,7 @@ "data": { "preview": {}, "profile": { - "common_join": [ - { + "common_join": [{ "conditions": [ "ON target.column_1 = source.column_1 and target.column_3 = source.column_3 and DATE(target.event_timestamp) = DATE(source.event_timestamp)" ], @@ -241,12 +342,58 @@ "owner": "user_1" } }, + "schema": { + "columns": [{ + "data_type": "STRING", + "is_nullable": true, + "name": "tablename-mid-column1", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + }, + { + "data_type": "STRING", + "is_nullable": true, + "name": "tablename-mid-column2", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + }, + { + "data_type": "BIGNUMERIC", + "is_nullable": true, + "name": "tablename-mid-column3", + "properties": { + "attributes": { + "mode": "NULLABLE" + } + } + } + ] + }, "resource": { "name": "tablename-mid", "service": "bigquery", "urn": "bigquery::gcpproject/dataset/tablename-mid" } - } + }, + "owners": [{ + "urn": "owner::bq/1", + "name": "John Smith", + "role": "user", + "email": "john.smith@email.com" + }, + { + "urn": "owner::bq/2", + "name": "Paul Smith", + "role": "user", + "email": "paul.smith@email.com" + } + ] } ] } diff --git a/swagger.yaml b/swagger.yaml index 3d5a1362..27e39c97 100644 --- a/swagger.yaml +++ b/swagger.yaml @@ -276,6 +276,10 @@ paths: name: "rankby" type: string description: 'descendingly sort based on a numeric field in the record. the nested field is written with period separated field name. eg, "data.profile.usage_count"' + - in: query + name: "searchby" + type: string + description: 'search on a specific records field. the nested field is written with period separated field name. eg, "data.schema.columns.name"' responses: 200: description: OK