Modify document for elasticsearch migration.

kubernetes-sigs · Aug 16, 2019 · df779fd · df779fd
1 parent e0d388c
commit df779fd
Show file tree

Hide file tree

Showing 4 changed files with 110 additions and 226 deletions.
diff --git a/internal/search/doc/doc.go b/internal/search/doc/doc.go
@@ -6,117 +6,53 @@ import (
 	"time"
 
 	"sigs.k8s.io/yaml"
-
-	"google.golang.org/appengine/search"
-)
-
-const (
-	identifierStr   = "identifier"
-	documentStr     = "document"
-	repoURLStr      = "repo_url"
-	filePathStr     = "file_path"
-	creationTimeStr = "creation_time"
 )
 
-// Represents an unbreakable character stream.
-type Atom = search.Atom
-
-// Implements search.FieldLoadSaver in order to index this representation of a kustomization.yaml
-// file.
+// This document is meant to be used at the elasticsearch document type.
+// Fields are serialized as-is to elasticsearch, where indices are built
+// to facilitate text search queries. Identifiers, Values, FilePath,
+// RepositoryURL and DocumentData are meant to be searched for text queries
+// directly, while the other fields can either be used as a filter, or as
+// additional metadata displayed in the UI.
+//
+// The fields of the document and their purpose are listed below:
+// - DocumentData contains the contents of the kustomization file.
+// - Kinds Represents the kubernetes Kinds that are in this file.
+// - Identifiers are a list of (partial and full) identifier paths that can be
+//   found by users. Each part of a path is delimited by ":" e.g. spec:replicas.
+// - Values are a list of identifier paths and their values that can be found by
+//   search queries. The path is delimited by ":" and the value follows the "="
+//   symbol e.g. spec:replicas=4.
+// - FilePath is the path of the file.
+// - RepositoryURL is the URL of the source repository.
+// - CreationTime is the time at which the file was created.
+//
+// Representing each Identifier and Value as a flat string representation
+// facilitates the use of complex text search features from elasticsearch such
+// as fuzzy searching, regex, wildcards, etc.
 type KustomizationDocument struct {
-	identifiers   []Atom
-	FilePath      Atom
-	RepositoryURL Atom
-	DocumentData  string
-	CreationTime  time.Time
+	DocumentData  string    `json:"document,omitempty"`
+	Kinds         []string  `json:"kinds,omitempty"`
+	Identifiers   []string  `json:"identifiers,omitempty"`
+	Values        []string  `json:"values,omitempty"`
+	FilePath      string    `json:"filePath,omitempty"`
+	RepositoryURL string    `json:"repositoryUrl,omitempty"`
+	CreationTime  time.Time `json:"creationTime,omitempty"`
 }
 
-// Partially implements search.FieldLoadSaver.
-func (k *KustomizationDocument) Load(fields []search.Field, metadata *search.DocumentMetadata) error {
-	k.identifiers = make([]search.Atom, 0)
-	wrongTypeError := func(name string, expected interface{}, actual interface{}) error {
-		return fmt.Errorf("%s expects type %T, found %#v", name, expected, actual)
-	}
-
-	for _, f := range fields {
-		switch f.Name {
-		case identifierStr:
-			identifier, ok := f.Value.(search.Atom)
-			if !ok {
-				return wrongTypeError(f.Name, identifier, f.Value)
-			}
-			k.identifiers = append(k.identifiers, identifier)
-
-		case documentStr:
-			document, ok := f.Value.(string)
-			if !ok {
-				return wrongTypeError(f.Name, document, f.Value)
-			}
-			k.DocumentData = document
-
-		case filePathStr:
-			fp, ok := f.Value.(search.Atom)
-			if !ok {
-				return wrongTypeError(f.Name, fp, f.Value)
-			}
-			k.FilePath = fp
-
-		case repoURLStr:
-			url, ok := f.Value.(search.Atom)
-			if !ok {
-				return wrongTypeError(f.Name, url, f.Value)
-			}
-			k.RepositoryURL = url
-
-		case creationTimeStr:
-			time, ok := f.Value.(time.Time)
-			if !ok {
-				return wrongTypeError(f.Name, time, f.Value)
-			}
-			k.CreationTime = time
-		default:
-			return fmt.Errorf("KustomizationDocument field %s not recognized", f.Name)
-		}
-	}
-
-	return nil
-}
-
-// Partially implements search.FieldLoadSaver.
-func (k *KustomizationDocument) Save() ([]search.Field, *search.DocumentMetadata, error) {
-	err := k.ParseYAML()
-	if err != nil {
-		return nil, nil, err
-	}
-
-	extraFields := []search.Field{
-		{Name: documentStr, Value: k.DocumentData},
-		{Name: filePathStr, Value: k.FilePath},
-		{Name: repoURLStr, Value: k.RepositoryURL},
-		{Name: creationTimeStr, Value: k.CreationTime},
-	}
-
-	fields := make([]search.Field, 0, len(k.identifiers)+len(extraFields))
-	for _, identifier := range k.identifiers {
-		fields = append(fields, search.Field{Name: identifierStr, Value: identifier})
-	}
-	fields = append(fields, extraFields...)
-
-	return fields, nil, nil
-}
-
-func (k *KustomizationDocument) ParseYAML() error {
-	k.identifiers = make([]Atom, 0)
+func (doc *KustomizationDocument) ParseYAML() error {
+	doc.Identifiers = make([]string, 0)
+	doc.Values = make([]string, 0)
 
 	var kustomization map[string]interface{}
-	err := yaml.Unmarshal([]byte(k.DocumentData), &kustomization)
+	err := yaml.Unmarshal([]byte(doc.DocumentData), &kustomization)
 	if err != nil {
 		return fmt.Errorf("unable to parse kustomization file: %s", err)
 	}
 
 	type Map struct {
 		data   map[string]interface{}
-		prefix Atom
+		prefix string
 	}
 
 	toVisit := []Map{
@@ -126,43 +62,53 @@ func (k *KustomizationDocument) ParseYAML() error {
 		},
 	}
 
-	atomJoin := func(vals ...interface{}) Atom {
-		strs := make([]string, 0, len(vals))
-		for _, val := range vals {
-			strs = append(strs, fmt.Sprint(val))
-		}
-		return Atom(strings.Trim(strings.Join(strs, " "), " "))
-	}
-
-	set := make(map[Atom]struct{})
-
+	identifierSet := make(map[string]struct{})
+	valueSet := make(map[string]struct{})
 	for i := 0; i < len(toVisit); i++ {
 		visiting := toVisit[i]
 		for k, v := range visiting.data {
-			set[atomJoin(visiting.prefix, k)] = struct{}{}
-			switch value := v.(type) {
-			case map[string]interface{}:
-				toVisit = append(toVisit, Map{
-					data:   value,
-					prefix: atomJoin(visiting.prefix, fmt.Sprint(k)),
-				})
-			case []interface{}:
-				for _, val := range value {
-					submap, ok := val.(map[string]interface{})
-					if !ok {
-						continue
-					}
+			identifier := fmt.Sprintf("%s:%s", visiting.prefix,
+				strings.Replace(k, ":", "%3A", -1))
+			// noop after the first iteration.
+			identifier = strings.TrimLeft(identifier, ":")
+
+			// Recursive function traverses structure to find
+			// identifiers and values. These later get formatted
+			// into doc.Identifiers and doc.Values respectively.
+			var traverseStructure func(interface{})
+			traverseStructure = func(arg interface{}) {
+				switch value := arg.(type) {
+				case map[string]interface{}:
 					toVisit = append(toVisit, Map{
-						data:   submap,
-						prefix: atomJoin(visiting.prefix, fmt.Sprint(k)),
+						data:   value,
+						prefix: identifier,
 					})
+				case []interface{}:
+					for _, val := range value {
+						traverseStructure(val)
+					}
+				case interface{}:
+					esc := strings.Replace(fmt.Sprintf("%v",
+						value), ":", "%3A", -1)
+
+					valuePath := fmt.Sprintf("%s=%v",
+						identifier, esc)
+					valueSet[valuePath] = struct{}{}
 				}
 			}
+			traverseStructure(v)
+
+			identifierSet[identifier] = struct{}{}
+
 		}
 	}
 
-	for key := range set {
-		k.identifiers = append(k.identifiers, key)
+	for val := range valueSet {
+		doc.Values = append(doc.Values, val)
+	}
+
+	for key := range identifierSet {
+		doc.Identifiers = append(doc.Identifiers, key)
 	}
 
 	return nil