-
Notifications
You must be signed in to change notification settings - Fork 444
/
secret.go
250 lines (217 loc) · 8.97 KB
/
secret.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
package kube
import (
"context"
"crypto/x509"
"encoding/pem"
"time"
errors "github.com/rotisserie/eris"
"github.com/solo-io/gloo/jobs/pkg/certgen"
"github.com/solo-io/go-utils/contextutils"
"github.com/solo-io/k8s-utils/certutils"
"go.uber.org/zap"
v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
)
type TlsSecret struct {
SecretName, SecretNamespace string
PrivateKeyFileName, CertFileName, CaBundleFileName string
PrivateKey, Cert, CaBundle []byte
}
// If there is a currently valid TLS secret with the given name and namespace, that is valid for the given
// service name/namespace, then return it. Otherwise return nil.
// The second return value is a bool indicating whether the secret is expiring soon (i.e. within the renewBeforeDuration).
func GetExistingValidTlsSecret(ctx context.Context, kube kubernetes.Interface, secretName string, secretNamespace string,
svcName string, svcNamespace string, renewBeforeDuration time.Duration) (*v1.Secret, bool, error) {
logger := contextutils.LoggerFrom(ctx)
logger.Infow("looking for existing valid tls secret",
zap.String("secretName", secretName),
zap.String("secretNamespace", secretNamespace))
secretClient := kube.CoreV1().Secrets(secretNamespace)
existing, err := secretClient.Get(ctx, secretName, metav1.GetOptions{})
if err != nil {
if apierrors.IsNotFound(err) {
logger.Warnw("failed to retrieve existing secret",
zap.String("secretName", secretName),
zap.String("secretNamespace", secretNamespace))
// necessary to return no errors in this case so we don't short circuit certgen on the first run
return nil, false, nil
}
return nil, false, errors.Wrapf(err, "failed to retrieve existing secret")
}
if existing.Type != v1.SecretTypeTLS {
return nil, false, errors.Errorf("unexpected secret type, expected %s and got %s", v1.SecretTypeTLS, existing.Type)
}
// decode the server cert(s)
certPemBytes := existing.Data[v1.TLSCertKey]
decodedCerts, err := decodeCertChain(certPemBytes)
if err != nil {
return nil, false, errors.Wrapf(err, "failed to decode cert chain")
}
logger.Infof("found %v certs", len(decodedCerts))
matchesSvc := false
now := time.Now().UTC()
for _, cert := range decodedCerts {
// if the cert is already expired or not yet valid, requests aren't working so don't try to use it while rotating
if now.Before(cert.NotBefore) || now.After(cert.NotAfter) {
logger.Info("cert is expired or not yet valid")
return nil, false, nil
}
// check if the cert is valid for this service
certMatchesSvc := certgen.ValidForService(cert.DNSNames, svcName, svcNamespace)
// if the cert is valid but expiring soon, then use it while rotating certs
if certMatchesSvc && now.After(cert.NotAfter.Add(-renewBeforeDuration)) {
logger.Info("cert is valid but expiring soon")
return existing, true, nil
}
if certMatchesSvc {
matchesSvc = true
}
}
// require at least one cert to match service
if !matchesSvc {
logger.Infow("cert is not valid for given service",
zap.String("svcName", svcName),
zap.String("svcNamespace", svcNamespace))
return nil, false, nil
}
// cert is valid!
logger.Info("existing cert is valid!")
return existing, false, nil
}
// Returns the created or updated secret
func CreateTlsSecret(ctx context.Context, kube kubernetes.Interface, secretCfg TlsSecret) (*v1.Secret, error) {
secret := makeTlsSecret(secretCfg)
secretClient := kube.CoreV1().Secrets(secret.Namespace)
logger := contextutils.LoggerFrom(ctx)
logger.Infow("creating TLS secret", zap.String("secret", secret.Name))
createdSecret, err := secretClient.Create(ctx, secret, metav1.CreateOptions{})
if err != nil {
if apierrors.IsAlreadyExists(err) {
logger.Infow("existing TLS secret found, attempting to update",
zap.String("secretName", secret.Name),
zap.String("secretNamespace", secret.Namespace))
existing, err := secretClient.Get(ctx, secret.Name, metav1.GetOptions{})
if err != nil {
return nil, errors.Wrapf(err, "failed to retrieve existing secret after receiving AlreadyExists error on Create")
}
secret.ResourceVersion = existing.ResourceVersion
updatedSecret, err := secretClient.Update(ctx, secret, metav1.UpdateOptions{})
if err != nil {
return nil, errors.Wrapf(err, "failed updating existing secret")
}
return updatedSecret, nil
}
return nil, errors.Wrapf(err, "failed creating secret")
}
return createdSecret, nil
}
// RotateCerts rotates certs in a few steps.
//
// We start with:
// - The current secret (currentTlsSecret) which will be rotated out. It initially
// contains the current server cert/key and ca bundle.
// - The newly generated certs (nextCerts) which we will switch over to.
//
// The update is done in the following order:
// 1. Set current secret's ca bundle to the current + next ca bundle (so both CAs are accepted temporarily)
// 2. Wait for the change to propagate
// 3. Set the current secret's server cert and private key to those of the newly generated certs
// 4. Wait for the change to propagate
// 5. Set the current secret's ca bundle to the next ca bundle. Now it contains only the next server
// cert and next ca bundle and the old ones are no longer supported.
func RotateCerts(ctx context.Context,
kubeClient kubernetes.Interface,
currentTlsSecret TlsSecret,
nextCerts *certutils.Certificates,
gracePeriod time.Duration) (*v1.Secret, error) {
logger := contextutils.LoggerFrom(ctx)
logger.Infow("rotating secret", zap.String("secretName", currentTlsSecret.SecretName), zap.String("secretNamespace", currentTlsSecret.SecretNamespace))
secretClient := kubeClient.CoreV1().Secrets(currentTlsSecret.SecretNamespace)
// set secret's caBundle to the combination of current ca + next ca, and persist changes
currentTlsSecret.CaBundle = append(currentTlsSecret.CaBundle, nextCerts.CaCertificate...)
secretToWrite := makeTlsSecret(currentTlsSecret)
logger.Info("updating to both ca bundles")
_, err := secretClient.Update(ctx, secretToWrite, metav1.UpdateOptions{})
if err != nil {
return nil, errors.Wrapf(err, "Failed updating to both ca bundles")
}
// wait for pods to pick up the ca bundle change
logger.Info("waiting for ca bundle changes to be picked up")
waitGracePeriod(ctx, gracePeriod, "ca bundles update")
// set serverCert to next and persist secret
currentTlsSecret.Cert = nextCerts.ServerCertificate
currentTlsSecret.PrivateKey = nextCerts.ServerCertKey
secretToWrite = makeTlsSecret(currentTlsSecret)
logger.Info("updating to new server cert")
_, err = secretClient.Update(ctx, secretToWrite, metav1.UpdateOptions{})
if err != nil {
return nil, errors.Wrapf(err, "Failed updating to new server cert")
}
// wait for pods to pick up the server cert change
logger.Info("waiting for server cert changes to be picked up")
waitGracePeriod(ctx, gracePeriod, "cert update")
// set currentSecret's caBundle to next (now currentSecret contains only next ca and next serverCert) and persist currentSecret
currentTlsSecret.CaBundle = nextCerts.CaCertificate
secretToWrite = makeTlsSecret(currentTlsSecret)
logger.Info("updating to new ca bundle")
_, err = secretClient.Update(ctx, secretToWrite, metav1.UpdateOptions{})
if err != nil {
return nil, errors.Wrapf(err, "Failed updating to new ca bundle")
}
// return the updated secret
logger.Info("secret has been updated")
return secretToWrite, nil
}
// description is an informative message about what we are waiting for
func waitGracePeriod(ctx context.Context, gracePeriod time.Duration, description string) {
logger := contextutils.LoggerFrom(ctx).With(zap.String("waitingFor", description))
ticker := time.NewTicker(1 * time.Second)
end := time.Now().Add(gracePeriod)
logger.Infof("Starting a grace period for all pods to settle: %v seconds remaining", int(time.Until(end).Seconds()))
for {
select {
case <-ctx.Done():
logger.Info("context cancelled, next rotation will not break trust, consider rotating an extra time")
return
case t := <-ticker.C:
if t.After(end) {
logger.Info("finished waiting for pods to settle")
return
}
// find the remaining integer amount of seconds remaining
secRemains := int(end.Sub(t).Seconds())
if secRemains%5 == 0 {
logger.Infof("%v seconds remaining", secRemains)
}
}
}
}
func makeTlsSecret(args TlsSecret) *v1.Secret {
return &v1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: args.SecretName,
Namespace: args.SecretNamespace,
},
Type: v1.SecretTypeTLS,
Data: map[string][]byte{
args.PrivateKeyFileName: args.PrivateKey,
args.CertFileName: args.Cert,
args.CaBundleFileName: args.CaBundle,
},
}
}
func decodeCertChain(chain []byte) ([]*x509.Certificate, error) {
var rootDecoded []byte
rest := chain
for {
var pemBlock *pem.Block
pemBlock, rest = pem.Decode(rest)
if pemBlock == nil {
break
}
rootDecoded = append(rootDecoded, pemBlock.Bytes...)
}
return x509.ParseCertificates(rootDecoded)
}