-
-
Notifications
You must be signed in to change notification settings - Fork 586
/
redis_source.go
188 lines (169 loc) · 6.88 KB
/
redis_source.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
// Package redis provides a Redis-based OCSP responder.
//
// This responder will first look for a response cached in Redis. If there is
// no response, or the response is too old, it will make a request to the RA
// for a freshly-signed response. If that succeeds, this responder will return
// the response to the user right away, while storing a copy to Redis in a
// separate goroutine.
//
// If the response was too old, but the request to the RA failed, this
// responder will serve the response anyhow. This allows for graceful
// degradation: it is better to serve a response that is 5 days old (outside
// the Baseline Requirements limits) than to serve no response at all.
// It's assumed that this will be wrapped in a responder.filterSource, which
// means that if a response is past its NextUpdate, we'll generate a 500.
package redis
import (
"context"
"errors"
"time"
"github.com/jmhodges/clock"
"github.com/letsencrypt/boulder/core"
blog "github.com/letsencrypt/boulder/log"
"github.com/letsencrypt/boulder/ocsp/responder"
"github.com/letsencrypt/boulder/rocsp"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/crypto/ocsp"
berrors "github.com/letsencrypt/boulder/errors"
)
type rocspClient interface {
GetResponse(ctx context.Context, serial string) ([]byte, error)
StoreResponse(ctx context.Context, resp *ocsp.Response) error
}
type redisSource struct {
client rocspClient
signer responder.Source
counter *prometheus.CounterVec
signAndSaveCounter *prometheus.CounterVec
cachedResponseAges prometheus.Histogram
clk clock.Clock
liveSigningPeriod time.Duration
// Error logs will be emitted at a rate of 1 in logSampleRate.
// If logSampleRate is 0, no logs will be emitted.
logSampleRate int
// Note: this logger is not currently used, as all audit log events are from
// the dbSource right now, but it should and will be used in the future.
log blog.Logger
}
// NewRedisSource returns a responder.Source which will look up OCSP responses in a
// Redis table.
func NewRedisSource(
client *rocsp.RWClient,
signer responder.Source,
liveSigningPeriod time.Duration,
clk clock.Clock,
stats prometheus.Registerer,
log blog.Logger,
logSampleRate int,
) (*redisSource, error) {
counter := prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "ocsp_redis_responses",
Help: "Count of OCSP requests/responses by action taken by the redisSource",
}, []string{"result"})
stats.MustRegister(counter)
signAndSaveCounter := prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "ocsp_redis_sign_and_save",
Help: "Count of OCSP sign and save requests",
}, []string{"cause", "result"})
stats.MustRegister(signAndSaveCounter)
// Set up 12-hour-wide buckets, measured in seconds.
buckets := make([]float64, 14)
for i := range buckets {
buckets[i] = 43200 * float64(i)
}
cachedResponseAges := prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "ocsp_redis_cached_response_ages",
Help: "How old are the cached OCSP responses when we successfully retrieve them.",
Buckets: buckets,
})
stats.MustRegister(cachedResponseAges)
var rocspReader rocspClient
if client != nil {
rocspReader = client
}
return &redisSource{
client: rocspReader,
signer: signer,
counter: counter,
signAndSaveCounter: signAndSaveCounter,
cachedResponseAges: cachedResponseAges,
liveSigningPeriod: liveSigningPeriod,
clk: clk,
log: log,
}, nil
}
// Response implements the responder.Source interface. It looks up the requested OCSP
// response in the redis cluster.
func (src *redisSource) Response(ctx context.Context, req *ocsp.Request) (*responder.Response, error) {
serialString := core.SerialToString(req.SerialNumber)
respBytes, err := src.client.GetResponse(ctx, serialString)
if err != nil {
if errors.Is(err, rocsp.ErrRedisNotFound) {
src.counter.WithLabelValues("not_found").Inc()
} else {
src.counter.WithLabelValues("lookup_error").Inc()
responder.SampledError(src.log, src.logSampleRate, "looking for cached response: %s", err)
// Proceed despite the error; when Redis is down we'd like to limp along with live signing
// rather than returning an error to the client.
}
return src.signAndSave(ctx, req, causeNotFound)
}
resp, err := ocsp.ParseResponse(respBytes, nil)
if err != nil {
src.counter.WithLabelValues("parse_error").Inc()
return nil, err
}
if src.isStale(resp) {
src.counter.WithLabelValues("stale").Inc()
freshResp, err := src.signAndSave(ctx, req, causeStale)
// Note: we could choose to return the stale response (up to its actual
// NextUpdate date), but if we pass the BR/root program limits, that
// becomes a compliance problem; returning an error is an availability
// problem and only becomes a compliance problem if we serve too many
// of them for too long (the exact conditions are not clearly defined
// by the BRs or root programs).
if err != nil {
return nil, err
}
return freshResp, nil
}
src.counter.WithLabelValues("success").Inc()
return &responder.Response{Response: resp, Raw: respBytes}, nil
}
func (src *redisSource) isStale(resp *ocsp.Response) bool {
age := src.clk.Since(resp.ThisUpdate)
src.cachedResponseAges.Observe(age.Seconds())
return age > src.liveSigningPeriod
}
type signAndSaveCause string
const (
causeStale signAndSaveCause = "stale"
causeNotFound signAndSaveCause = "not_found"
causeMismatch signAndSaveCause = "mismatch"
)
func (src *redisSource) signAndSave(ctx context.Context, req *ocsp.Request, cause signAndSaveCause) (*responder.Response, error) {
resp, err := src.signer.Response(ctx, req)
if errors.Is(err, responder.ErrNotFound) {
src.signAndSaveCounter.WithLabelValues(string(cause), "certificate_not_found").Inc()
return nil, responder.ErrNotFound
} else if errors.Is(err, berrors.UnknownSerial) {
// UnknownSerial is more interesting than NotFound, because it means we don't
// have a record in the `serials` table, which is kept longer-term than the
// `certificateStatus` table. That could mean someone is making up silly serial
// numbers in their requests to us, or it could mean there's site on the internet
// using a certificate that we don't have a record of in the `serials` table.
src.signAndSaveCounter.WithLabelValues(string(cause), "unknown_serial").Inc()
responder.SampledError(src.log, src.logSampleRate, "unknown serial: %s", core.SerialToString(req.SerialNumber))
return nil, responder.ErrNotFound
} else if err != nil {
src.signAndSaveCounter.WithLabelValues(string(cause), "signing_error").Inc()
return nil, err
}
src.signAndSaveCounter.WithLabelValues(string(cause), "signing_success").Inc()
go func() {
// We don't care about the error here, because if storing the response
// fails, we'll just generate a new one on the next request.
_ = src.client.StoreResponse(context.Background(), resp.Response)
}()
return resp, nil
}