From 104895bad8d73cbc95f54aaaa5401b489243831b Mon Sep 17 00:00:00 2001 From: Adil Ansari Date: Thu, 22 Jun 2023 11:13:51 -0700 Subject: [PATCH] feat: Adding custom token separators to search index (#1280) * feat: Adding custom token separators to search index * rejecting update on token separators --- schema/search.go | 48 ++++++++++++++++++++++++++++++--------- server/metadata/tenant.go | 9 +++++--- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/schema/search.go b/schema/search.go index 848541db..569a26e7 100644 --- a/schema/search.go +++ b/schema/search.go @@ -61,10 +61,22 @@ type SearchSource struct { } type SearchJSONSchema struct { - Name string `json:"title,omitempty"` - Description string `json:"description,omitempty"` - Properties jsoniter.RawMessage `json:"properties,omitempty"` - Source *SearchSource `json:"source,omitempty"` + Name string `json:"title,omitempty"` + Description string `json:"description,omitempty"` + Properties jsoniter.RawMessage `json:"properties,omitempty"` + Source *SearchSource `json:"source,omitempty"` + Options *SearchSchemaOptions `json:"options,omitempty"` +} + +type SearchSchemaOptions struct { + TokenSeparators *[]string `json:"token_separators,omitempty"` +} + +func (s *SearchSchemaOptions) GetTokenSeparators() []string { + if s.TokenSeparators == nil { + return make([]string, 0) + } + return *s.TokenSeparators } // SearchFactory is used as an intermediate step so that collection can be initialized with properly encoded values. @@ -74,9 +86,10 @@ type SearchFactory struct { // Fields are derived from the user schema. Fields []*Field // Schema is the raw JSON schema received - Schema jsoniter.RawMessage - Sub string - Source SearchSource + Schema jsoniter.RawMessage + Sub string + Source SearchSource + Options SearchSchemaOptions } func (fb *FactoryBuilder) BuildSearch(index string, reqSchema jsoniter.RawMessage) (*SearchFactory, error) { @@ -120,12 +133,17 @@ func (fb *FactoryBuilder) BuildSearch(index string, reqSchema jsoniter.RawMessag return nil, err } } + var schemaOptions SearchSchemaOptions + if schema.Options != nil { + schemaOptions = *schema.Options + } factory := &SearchFactory{ - Name: index, - Fields: fields, - Schema: searchSchema, - Source: source, + Name: index, + Fields: fields, + Schema: searchSchema, + Source: source, + Options: schemaOptions, } idFound := false @@ -181,6 +199,9 @@ type SearchIndex struct { // will be one to one mapped to queryable field but complex fields like object type field there may be more than // one queryableFields. As queryableFields represent a flattened state these can be used as-is to index in memory. QueryableFields []*QueryableField + // TokenSeparators is a list of symbols or special characters to be used for splitting the text into individual + // words in addition to space and new-line characters. + TokenSeparators []string // Source of this index Source SearchSource SearchIDField *QueryableField @@ -198,12 +219,14 @@ func NewSearchIndex(ver uint32, searchStoreName string, factory *SearchFactory, searchIdField = q } } + index := &SearchIndex{ Version: ver, Name: factory.Name, Fields: factory.Fields, Schema: factory.Schema, Source: factory.Source, + TokenSeparators: factory.Options.GetTokenSeparators(), SearchIDField: searchIdField, QueryableFields: queryableFields, int64FieldsPath: buildInt64Path(factory.Fields), @@ -330,6 +353,9 @@ func (s *SearchIndex) buildSearchSchema(name string) { Name: name, Fields: tsFields, } + if len(s.TokenSeparators) > 0 { + s.StoreSchema.TokenSeparators = &s.TokenSeparators + } } func (s *SearchIndex) GetSearchDeltaFields(existingFields []*QueryableField, fieldsInSearch []tsApi.Field) []tsApi.Field { diff --git a/server/metadata/tenant.go b/server/metadata/tenant.go index 1d27a111..3023b4cd 100644 --- a/server/metadata/tenant.go +++ b/server/metadata/tenant.go @@ -862,13 +862,16 @@ func (tenant *Tenant) CreateSearchIndex(ctx context.Context, tx transaction.Tx, func (tenant *Tenant) createSearchIndex(ctx context.Context, tx transaction.Tx, project *Project, factory *schema.SearchFactory) error { if index, ok := project.search.GetIndex(factory.Name); ok { - if eq, err := isSchemaEq(index.Schema, factory.Schema); eq || err != nil { - // shortcut to just check if schema is eq then return early + if tokensEq := reflect.DeepEqual(factory.Options.GetTokenSeparators(), index.TokenSeparators); !tokensEq { + return errors.InvalidArgument("`token_separators` cannot be modified, please create a new search index") + } + // shortcut to just check if there aren't any changes then return early + if eq, err := isSchemaEq(index.Schema, factory.Schema); eq || err != nil { tx.Context().MarkNoMetadataStateChanged() - return err } + // Tokens can only be set when schema is created, if tokens changed, then raise an informative error here return tenant.updateSearchIndex(ctx, tx, project, factory, index) }