forked from juju/charmstore
-
Notifications
You must be signed in to change notification settings - Fork 0
/
search.go
353 lines (322 loc) · 10.1 KB
/
search.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
// Copyright 2014 Canonical Ltd.
// Licensed under the LGPLv3, see LICENCE file for details.
package charmstore
import (
"crypto/sha1"
"encoding/base64"
"strings"
"time"
"gopkg.in/errgo.v1"
"gopkg.in/juju/charm.v4"
"github.com/juju/charmstore/internal/elasticsearch"
"github.com/juju/charmstore/internal/mongodoc"
)
// StoreElasticSearch provides strongly typed methods for accessing the
// elasticsearch database. These methods will not return errors if
// elasticsearch is not configured, allowing them to be safely called even if
// it is not enabled in this service.
type StoreElasticSearch struct {
*elasticsearch.Index
}
const typeName = "entity"
// Put inserts the mongodoc.Entity into elasticsearch if elasticsearch
// is configured.
func (ses *StoreElasticSearch) put(entity *mongodoc.Entity) error {
if ses == nil || ses.Index == nil {
return nil
}
_, err := ses.PutDocumentVersion(typeName, ses.getID(entity), int64(entity.URL.Revision), entity)
return err
}
// getID returns an ID for the elasticsearch document based on the contents of the
// mongoDB document. This is to allow elasticsearch documents to be replaced with
// updated versions when charm data is changed.
func (ses *StoreElasticSearch) getID(entity *mongodoc.Entity) string {
ref := *entity.URL
ref.Revision = -1
b := sha1.Sum([]byte(ref.String()))
s := base64.URLEncoding.EncodeToString(b[:])
// Cut off any trailing = as there is no need for them and they will get URL escaped.
return strings.TrimRight(s, "=")
}
// Search searches for matching entities in the configured elasticsearch index.
// If there is no elasticsearch index configured then it will return an empty
// SearchResult, as if no results were found.
func (ses *StoreElasticSearch) search(sp SearchParams) (SearchResult, error) {
if ses == nil || ses.Index == nil {
return SearchResult{}, nil
}
q := createSearchDSL(sp)
q.Fields = append(q.Fields, "URL")
esr, err := ses.Search(typeName, q)
if err != nil {
return SearchResult{}, errgo.Mask(err)
}
r := SearchResult{
SearchTime: time.Duration(esr.Took) * time.Millisecond,
Total: esr.Hits.Total,
Results: make([]*charm.Reference, 0, len(esr.Hits.Hits)),
}
for _, h := range esr.Hits.Hits {
ref, err := charm.ParseReference(h.Fields.GetString("URL"))
if err != nil {
return SearchResult{}, errgo.Notef(err, "invalid result %q", h.Fields.GetString("URL"))
}
r.Results = append(r.Results, ref)
}
return r, nil
}
// ExportToElasticSearch reads all of the mongodoc Entities and writes
// them to elasticsearch
func (store *Store) ExportToElasticSearch() error {
var result mongodoc.Entity
iter := store.DB.Entities().Find(nil).Iter()
defer iter.Close() // Make sure we always close on error.
for iter.Next(&result) {
if err := store.ES.put(&result); err != nil {
return errgo.Notef(err, "cannot index %s", result.URL)
}
}
if err := iter.Close(); err != nil {
return err
}
return nil
}
// SearchParams represents the search parameters used to search the store.
type SearchParams struct {
// The text to use in the full text search query.
Text string
// If autocomplete is specified, the search will return only charms and
// bundles with a name that has text as a prefix.
AutoComplete bool
// Limit the search to items with attributes that match the specified filter value.
Filters map[string][]string
// Limit the number of returned items to the specified count.
Limit int
// Include the following metadata items in the search results
Include []string
// Start the the returned items at a specific offset
Skip int
}
// SearchResult represents the result of performing a search.
type SearchResult struct {
SearchTime time.Duration
Total int
Results []*charm.Reference
}
// Search searches the store for the given SearchParams.
// It returns a slice a SearchResult containing the results of the search.
func (store *Store) Search(sp SearchParams) (SearchResult, error) {
results, err := store.ES.search(sp)
if err != nil {
return SearchResult{}, errgo.Mask(err)
}
return results, nil
}
// queryFields provides a map of fields to weighting to use with the
// elasticsearch query.
func queryFields(sp SearchParams) map[string]float64 {
fields := map[string]float64{
"URL.ngrams": 8,
"CharmMeta.Categories": 5,
"BundleData.Tags": 5,
"CharmProvidedInterfaces": 3,
"CharmRequiredInterfaces": 3,
"CharmMeta.Description": 1,
"BundleReadMe": 1,
}
if sp.AutoComplete {
fields["CharmMeta.Name.ngrams"] = 10
} else {
fields["CharmMeta.Name"] = 10
}
return fields
}
// encodeFields takes a map of field name to weight and builds a slice of strings
// representing those weighted fields for a MultiMatchQuery.
func encodeFields(fields map[string]float64) []string {
fs := make([]string, 0, len(fields))
for k, v := range fields {
fs = append(fs, elasticsearch.BoostField(k, v))
}
return fs
}
// createSearchDSL builds an elasticsearch query from the query parameters.
// http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html
func createSearchDSL(sp SearchParams) elasticsearch.QueryDSL {
qdsl := elasticsearch.QueryDSL{
From: sp.Skip,
Size: sp.Limit,
}
// Full text search
var q elasticsearch.Query
if sp.Text == "" {
q = elasticsearch.MatchAllQuery{}
} else {
q = elasticsearch.MultiMatchQuery{
Query: sp.Text,
Fields: encodeFields(queryFields(sp)),
}
}
q = elasticsearch.FunctionScoreQuery{
Query: q,
Functions: []elasticsearch.Function{
elasticsearch.DecayFunction{
Function: "linear",
Field: "UploadTime",
Scale: "365d",
},
elasticsearch.BoostFactorFunction{
Filter: ownerFilter(""),
BoostFactor: 1.25,
},
},
}
// Filters
qdsl.Query = elasticsearch.FilteredQuery{
Query: q,
Filter: createFilters(sp.Filters),
}
return qdsl
}
// createFilters converts the filters requested with the serch API into
// filters in the elasticsearch query DSL. Please see http://tinyurl.com/qzobc69
// for details of how filters are specified in the API. For each key in f a filter is
// created that matches any one of the set of values specified for that key.
// The created filter will only match when at least one of the requested values
// matches for all of the requested keys. Any filter names that are not defined
// in the filters map will be silently skipped.
func createFilters(f map[string][]string) elasticsearch.Filter {
af := make(elasticsearch.AndFilter, 0, len(f))
for k, vals := range f {
filter, ok := filters[k]
if !ok {
continue
}
of := make(elasticsearch.OrFilter, 0, len(vals))
for _, v := range vals {
of = append(of, filter(v))
}
af = append(af, of)
}
return af
}
// filters contains a mapping from a filter parameter in the API to a
// function that will generate an elasticsearch query DSL filter for the
// given value.
var filters = map[string]func(string) elasticsearch.Filter{
"description": descriptionFilter,
"name": nameFilter,
"owner": ownerFilter,
"provides": termFilter("CharmProvidedInterfaces"),
"requires": termFilter("CharmRequiredInterfaces"),
"series": seriesFilter,
"summary": summaryFilter,
"tags": tagsFilter,
"type": typeFilter,
}
// descriptionFilter generates a filter that will match against the
// description field of the charm data.
func descriptionFilter(value string) elasticsearch.Filter {
return elasticsearch.QueryFilter{
Query: elasticsearch.MatchQuery{
Field: "CharmMeta.Description",
Query: value,
Type: "phrase",
},
}
}
// nameFilter generates a filter that will match against the
// name of the charm or bundle.
func nameFilter(value string) elasticsearch.Filter {
// TODO(mhilton) implement wildcards as in http://tinyurl.com/k46xexe
return elasticsearch.RegexpFilter{
Field: "URL",
Regexp: `cs:(\~[^/]*/)?[^/]*/` + elasticsearch.EscapeRegexp(value) + "-[0-9]+",
}
}
// ownerFilter generates a filter that will match against the
// owner taken from the URL.
func ownerFilter(value string) elasticsearch.Filter {
var re string
if value == "" {
re = `cs:[^\~].*`
} else {
re = `cs:\~` + elasticsearch.EscapeRegexp(value) + "/.*"
}
return elasticsearch.RegexpFilter{
Field: "URL",
Regexp: re,
}
}
// seriesFilter generates a filter that will match against the
// series taken from the URL.
func seriesFilter(value string) elasticsearch.Filter {
return elasticsearch.RegexpFilter{
Field: "URL",
Regexp: `cs:(\~[^/]*/)?` + elasticsearch.EscapeRegexp(value) + "/.*-[0-9]+",
}
}
// summaryFilter generates a filter that will match against the
// summary field from the charm data.
func summaryFilter(value string) elasticsearch.Filter {
return elasticsearch.QueryFilter{
Query: elasticsearch.MatchQuery{
Field: "CharmMeta.Summary",
Query: value,
Type: "phrase",
},
}
}
// tagsFilter generates a filter that will match against the "tags" field
// in the data. For charms this is the Categories field and for bundles this
// is the Tags field.
func tagsFilter(value string) elasticsearch.Filter {
tags := strings.Split(value, " ")
af := make(elasticsearch.AndFilter, 0, len(tags))
for _, t := range tags {
if t == "" {
continue
}
af = append(af, elasticsearch.OrFilter{
elasticsearch.TermFilter{
Field: "CharmMeta.Categories",
Value: t,
},
elasticsearch.TermFilter{
Field: "BundleData.Tags",
Value: t,
},
})
}
return af
}
// termFilter creates a function that generates a filter on the specified
// document field.
func termFilter(field string) func(string) elasticsearch.Filter {
return func(value string) elasticsearch.Filter {
terms := strings.Split(value, " ")
af := make(elasticsearch.AndFilter, 0, len(terms))
for _, t := range terms {
if t == "" {
continue
}
af = append(af, elasticsearch.TermFilter{
Field: field,
Value: t,
})
}
return af
}
}
// bundleFilter is a filter that matches against bundles, based on
// the URL.
var bundleFilter = seriesFilter("bundle")
// typeFilter generates a filter that is used to match either only charms,
// or only bundles.
func typeFilter(value string) elasticsearch.Filter {
if value == "bundle" {
return bundleFilter
}
return elasticsearch.NotFilter{bundleFilter}
}