forked from grafana/grafana
/
manager.go
573 lines (470 loc) · 16.3 KB
/
manager.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
package manager
import (
"bytes"
"context"
"crypto/rand"
"encoding/base64"
"errors"
"fmt"
"strconv"
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/sync/errgroup"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/usagestats"
"github.com/grafana/grafana/pkg/services/encryption"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/kmsproviders"
"github.com/grafana/grafana/pkg/services/secrets"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/util"
)
const (
keyIdDelimiter = '#'
)
var (
// now is used for testing purposes,
// as a way to fake time.Now function.
now = time.Now
)
type SecretsService struct {
store secrets.Store
enc encryption.Internal
cfg *setting.Cfg
features featuremgmt.FeatureToggles
usageStats usagestats.Service
mtx sync.Mutex
dataKeyCache *dataKeyCache
pOnce sync.Once
providers map[secrets.ProviderID]secrets.Provider
kmsProvidersService kmsproviders.Service
currentProviderID secrets.ProviderID
log log.Logger
}
func ProvideSecretsService(
store secrets.Store,
kmsProvidersService kmsproviders.Service,
enc encryption.Internal,
cfg *setting.Cfg,
features featuremgmt.FeatureToggles,
usageStats usagestats.Service,
) (*SecretsService, error) {
ttl := cfg.SectionWithEnvOverrides("security.encryption").Key("data_keys_cache_ttl").MustDuration(15 * time.Minute)
currentProviderID := kmsproviders.NormalizeProviderID(secrets.ProviderID(
cfg.SectionWithEnvOverrides("security").Key("encryption_provider").MustString(kmsproviders.Default),
))
s := &SecretsService{
store: store,
enc: enc,
cfg: cfg,
usageStats: usageStats,
kmsProvidersService: kmsProvidersService,
dataKeyCache: newDataKeyCache(ttl),
currentProviderID: currentProviderID,
features: features,
log: log.New("secrets"),
}
enabled := !features.IsEnabled(featuremgmt.FlagDisableEnvelopeEncryption)
if enabled {
err := s.InitProviders()
if err != nil {
return nil, err
}
}
if _, ok := s.providers[currentProviderID]; enabled && !ok {
return nil, fmt.Errorf("missing configuration for current encryption provider %s", currentProviderID)
}
if !enabled && currentProviderID != kmsproviders.Default {
s.log.Warn("Changing encryption provider requires enabling envelope encryption feature")
}
s.log.Info("Envelope encryption state", "enabled", enabled, "current provider", currentProviderID)
s.registerUsageMetrics()
return s, nil
}
func (s *SecretsService) InitProviders() (err error) {
s.pOnce.Do(func() {
s.providers, err = s.kmsProvidersService.Provide()
})
return
}
func (s *SecretsService) registerUsageMetrics() {
s.usageStats.RegisterMetricsFunc(func(context.Context) (map[string]interface{}, error) {
usageMetrics := make(map[string]interface{})
// Enabled / disabled
usageMetrics["stats.encryption.envelope_encryption_enabled.count"] = 0
if !s.features.IsEnabled(featuremgmt.FlagDisableEnvelopeEncryption) {
usageMetrics["stats.encryption.envelope_encryption_enabled.count"] = 1
}
// Current provider
kind, err := s.currentProviderID.Kind()
if err != nil {
return nil, err
}
usageMetrics[fmt.Sprintf("stats.encryption.current_provider.%s.count", kind)] = 1
// Count by kind
countByKind := make(map[string]int)
for id := range s.providers {
kind, err := id.Kind()
if err != nil {
return nil, err
}
countByKind[kind]++
}
for kind, count := range countByKind {
usageMetrics[fmt.Sprintf(`stats.encryption.providers.%s.count`, kind)] = count
}
return usageMetrics, nil
})
}
func (s *SecretsService) providersInitialized() bool {
return len(s.providers) > 0
}
func (s *SecretsService) encryptedWithEnvelopeEncryption(payload []byte) bool {
return len(payload) > 0 && payload[0] == keyIdDelimiter
}
var b64 = base64.RawStdEncoding
func (s *SecretsService) Encrypt(ctx context.Context, payload []byte, opt secrets.EncryptionOptions) ([]byte, error) {
// Use legacy encryption service if featuremgmt.FlagDisableEnvelopeEncryption toggle is on
if s.features.IsEnabled(featuremgmt.FlagDisableEnvelopeEncryption) {
return s.enc.Encrypt(ctx, payload, setting.SecretKey)
}
var err error
defer func() {
opsCounter.With(prometheus.Labels{
"success": strconv.FormatBool(err == nil),
"operation": OpEncrypt,
}).Inc()
}()
// If encryption featuremgmt.FlagEnvelopeEncryption toggle is on, use envelope encryption
scope := opt()
label := secrets.KeyLabel(scope, s.currentProviderID)
var id string
var dataKey []byte
id, dataKey, err = s.currentDataKey(ctx, label, scope)
if err != nil {
s.log.Error("Failed to get current data key", "error", err, "label", label)
return nil, err
}
var encrypted []byte
encrypted, err = s.enc.Encrypt(ctx, payload, string(dataKey))
if err != nil {
s.log.Error("Failed to encrypt secret", "error", err)
return nil, err
}
prefix := make([]byte, b64.EncodedLen(len(id))+2)
b64.Encode(prefix[1:], []byte(id))
prefix[0] = keyIdDelimiter
prefix[len(prefix)-1] = keyIdDelimiter
blob := make([]byte, len(prefix)+len(encrypted))
copy(blob, prefix)
copy(blob[len(prefix):], encrypted)
return blob, nil
}
// currentDataKey looks up for current data key in cache or database by name, and decrypts it.
// If there's no current data key in cache nor in database it generates a new random data key,
// and stores it into both the in-memory cache and database (encrypted by the encryption provider).
func (s *SecretsService) currentDataKey(ctx context.Context, label string, scope string) (string, []byte, error) {
// We want only one request fetching current data key at time to
// avoid the creation of multiple ones in case there's no one existing.
s.mtx.Lock()
defer s.mtx.Unlock()
// We try to fetch the data key, either from cache or database
id, dataKey, err := s.dataKeyByLabel(ctx, label)
if err != nil {
return "", nil, err
}
// If no existing data key was found, create a new one
if dataKey == nil {
id, dataKey, err = s.newDataKey(ctx, label, scope)
if err != nil {
return "", nil, err
}
}
return id, dataKey, nil
}
// dataKeyByLabel looks up for data key in cache by label.
// Otherwise, it fetches it from database, decrypts it and caches it decrypted.
func (s *SecretsService) dataKeyByLabel(ctx context.Context, label string) (string, []byte, error) {
// 0. Get data key from in-memory cache.
if entry, exists := s.dataKeyCache.getByLabel(label); exists && entry.active {
return entry.id, entry.dataKey, nil
}
// 1. Get data key from database.
dataKey, err := s.store.GetCurrentDataKey(ctx, label)
if err != nil {
if errors.Is(err, secrets.ErrDataKeyNotFound) {
return "", nil, nil
}
return "", nil, err
}
// 2.1 Find the encryption provider.
provider, exists := s.providers[kmsproviders.NormalizeProviderID(dataKey.Provider)]
if !exists {
return "", nil, fmt.Errorf("could not find encryption provider '%s'", dataKey.Provider)
}
// 2.2 Decrypt the data key fetched from the database.
decrypted, err := provider.Decrypt(ctx, dataKey.EncryptedData)
if err != nil {
return "", nil, err
}
// 3. Store the decrypted data key into the in-memory cache.
s.cacheDataKey(dataKey, decrypted)
return dataKey.Id, decrypted, nil
}
// newDataKey creates a new random data key, encrypts it and stores it into the database and cache.
func (s *SecretsService) newDataKey(ctx context.Context, label string, scope string) (string, []byte, error) {
// 1. Create new data key.
dataKey, err := newRandomDataKey()
if err != nil {
return "", nil, err
}
// 2.1 Find the encryption provider.
provider, exists := s.providers[s.currentProviderID]
if !exists {
return "", nil, fmt.Errorf("could not find encryption provider '%s'", s.currentProviderID)
}
// 2.2 Encrypt the data key.
encrypted, err := provider.Encrypt(ctx, dataKey)
if err != nil {
return "", nil, err
}
// 3. Store its encrypted value into the DB.
id := util.GenerateShortUID()
dbDataKey := secrets.DataKey{
Active: true,
Id: id,
Provider: s.currentProviderID,
EncryptedData: encrypted,
Label: label,
Scope: scope,
}
err = s.store.CreateDataKey(ctx, &dbDataKey)
if err != nil {
return "", nil, err
}
return id, dataKey, nil
}
func newRandomDataKey() ([]byte, error) {
rawDataKey := make([]byte, 16)
_, err := rand.Read(rawDataKey)
if err != nil {
return nil, err
}
return rawDataKey, nil
}
func (s *SecretsService) Decrypt(ctx context.Context, payload []byte) ([]byte, error) {
var err error
defer func() {
opsCounter.With(prometheus.Labels{
"success": strconv.FormatBool(err == nil),
"operation": OpDecrypt,
}).Inc()
if err != nil {
s.log.Error("Failed to decrypt secret", "error", err)
}
}()
if len(payload) == 0 {
err = fmt.Errorf("unable to decrypt empty payload")
return nil, err
}
// If encrypted with envelope encryption, the feature is disabled and
// no provider is initialized, then we throw an error.
if s.encryptedWithEnvelopeEncryption(payload) &&
s.features.IsEnabled(featuremgmt.FlagDisableEnvelopeEncryption) &&
!s.providersInitialized() {
err = fmt.Errorf("failed to decrypt a secret encrypted with envelope encryption: envelope encryption is disabled")
return nil, err
}
var dataKey []byte
if !s.encryptedWithEnvelopeEncryption(payload) {
secretKey := s.cfg.SectionWithEnvOverrides("security").Key("secret_key").Value()
dataKey = []byte(secretKey)
} else {
payload = payload[1:]
endOfKey := bytes.Index(payload, []byte{keyIdDelimiter})
if endOfKey == -1 {
err = fmt.Errorf("could not find valid key id in encrypted payload")
return nil, err
}
b64Key := payload[:endOfKey]
payload = payload[endOfKey+1:]
keyId := make([]byte, b64.DecodedLen(len(b64Key)))
_, err = b64.Decode(keyId, b64Key)
if err != nil {
return nil, err
}
dataKey, err = s.dataKeyById(ctx, string(keyId))
if err != nil {
s.log.Error("Failed to lookup data key by id", "id", string(keyId), "error", err)
return nil, err
}
}
var decrypted []byte
decrypted, err = s.enc.Decrypt(ctx, payload, string(dataKey))
return decrypted, err
}
func (s *SecretsService) EncryptJsonData(ctx context.Context, kv map[string]string, opt secrets.EncryptionOptions) (map[string][]byte, error) {
encrypted := make(map[string][]byte)
for key, value := range kv {
encryptedData, err := s.Encrypt(ctx, []byte(value), opt)
if err != nil {
return nil, err
}
encrypted[key] = encryptedData
}
return encrypted, nil
}
func (s *SecretsService) DecryptJsonData(ctx context.Context, sjd map[string][]byte) (map[string]string, error) {
decrypted := make(map[string]string)
for key, data := range sjd {
decryptedData, err := s.Decrypt(ctx, data)
if err != nil {
return nil, err
}
decrypted[key] = string(decryptedData)
}
return decrypted, nil
}
func (s *SecretsService) GetDecryptedValue(ctx context.Context, sjd map[string][]byte, key, fallback string) string {
if value, ok := sjd[key]; ok {
decryptedData, err := s.Decrypt(ctx, value)
if err != nil {
return fallback
}
return string(decryptedData)
}
return fallback
}
// dataKeyById looks up for data key in cache.
// Otherwise, it fetches it from database and returns it decrypted.
func (s *SecretsService) dataKeyById(ctx context.Context, id string) ([]byte, error) {
// 0. Get decrypted data key from in-memory cache.
if entry, exists := s.dataKeyCache.getById(id); exists {
return entry.dataKey, nil
}
// 1. Get encrypted data key from database.
dataKey, err := s.store.GetDataKey(ctx, id)
if err != nil {
return nil, err
}
// 2.1. Find the encryption provider.
provider, exists := s.providers[kmsproviders.NormalizeProviderID(dataKey.Provider)]
if !exists {
return nil, fmt.Errorf("could not find encryption provider '%s'", dataKey.Provider)
}
// 2.2. Encrypt the data key.
decrypted, err := provider.Decrypt(ctx, dataKey.EncryptedData)
if err != nil {
return nil, err
}
// 3. Store the decrypted data key into the in-memory cache.
s.cacheDataKey(dataKey, decrypted)
return decrypted, nil
}
func (s *SecretsService) GetProviders() map[secrets.ProviderID]secrets.Provider {
return s.providers
}
func (s *SecretsService) RotateDataKeys(ctx context.Context) error {
s.log.Info("Data keys rotation triggered, acquiring lock...")
s.mtx.Lock()
defer s.mtx.Unlock()
s.log.Info("Data keys rotation started")
err := s.store.DisableDataKeys(ctx)
if err != nil {
s.log.Error("Data keys rotation failed", "error", err)
return err
}
s.dataKeyCache.flush()
s.log.Info("Data keys rotation finished successfully")
return nil
}
func (s *SecretsService) ReEncryptDataKeys(ctx context.Context) error {
s.log.Info("Data keys re-encryption triggered")
if s.features.IsEnabled(featuremgmt.FlagDisableEnvelopeEncryption) {
s.log.Info("Envelope encryption is not enabled but trying to init providers anyway...")
if err := s.InitProviders(); err != nil {
s.log.Error("Envelope encryption providers initialization failed", "error", err)
return err
}
}
if err := s.store.ReEncryptDataKeys(ctx, s.providers, s.currentProviderID); err != nil {
s.log.Error("Data keys re-encryption failed", "error", err)
return err
}
s.dataKeyCache.flush()
s.log.Info("Data keys re-encryption finished successfully")
return nil
}
func (s *SecretsService) Run(ctx context.Context) error {
gc := time.NewTicker(
s.cfg.SectionWithEnvOverrides("security.encryption").Key("data_keys_cache_cleanup_interval").
MustDuration(time.Minute),
)
grp, gCtx := errgroup.WithContext(ctx)
for _, p := range s.providers {
if svc, ok := p.(secrets.BackgroundProvider); ok {
grp.Go(func() error {
return svc.Run(gCtx)
})
}
}
for {
select {
case <-gc.C:
s.log.Debug("Removing expired data keys from cache...")
s.dataKeyCache.removeExpired()
s.log.Debug("Removing expired data keys from cache finished successfully")
case <-gCtx.Done():
s.log.Debug("Grafana is shutting down; stopping...")
gc.Stop()
if err := grp.Wait(); err != nil && !errors.Is(err, context.Canceled) {
return err
}
return nil
}
}
}
// Caching a data key is tricky, because at SecretsService level we cannot guarantee
// that a newly created data key has actually been persisted, depending on the different
// use cases that rely on SecretsService encryption and different database engines that
// we have support for, because the data key creation may have happened within a DB TX,
// that may fail afterwards.
//
// Therefore, if we cache a data key that hasn't been persisted with success (and won't),
// and later that one is used for a encryption operation (aside from the DB TX that created
// it), we may end up with data encrypted by a non-persisted data key, which could end up
// in (unrecoverable) data corruption.
//
// So, we cache the data key by id and/or by label, depending on the data key's lifetime,
// assuming that a data key older than a "caution period" should have been persisted.
//
// Look at the comments inline for further details.
// You can also take a look at the issue below for more context:
// https://github.com/grafana/grafana-enterprise/issues/4252
func (s *SecretsService) cacheDataKey(dataKey *secrets.DataKey, decrypted []byte) {
// First, we cache the data key by id, because cache "by id" is
// only used by decrypt operations, so no risk of corrupting data.
entry := &dataKeyCacheEntry{
id: dataKey.Id,
label: dataKey.Label,
dataKey: decrypted,
active: dataKey.Active,
}
s.dataKeyCache.addById(entry)
// Then, we cache the data key by label, ONLY if data key's lifetime
// is longer than a certain "caution period", because cache "by label"
// is used (only) by encrypt operations, and we want to ensure that
// no data key is cached for encryption ops before being persisted.
const cautionPeriod = 10 * time.Minute
// We consider a "caution period" of 10m to be long enough for any database
// transaction that implied a data key creation to have finished successfully.
//
// Therefore, we consider that if we fetch a data key from the database,
// more than 10m later than its creation, it should have been actually
// persisted - i.e. the transaction that created it is no longer running.
nowMinusCautionPeriod := now().Add(-cautionPeriod)
if dataKey.Created.Before(nowMinusCautionPeriod) {
s.dataKeyCache.addByLabel(entry)
}
}