Skip to content

Commit

Permalink
Merge 83b47a8 into 37199f2
Browse files Browse the repository at this point in the history
  • Loading branch information
cpu committed Jun 29, 2018
2 parents 37199f2 + 83b47a8 commit 6239df8
Show file tree
Hide file tree
Showing 6 changed files with 375 additions and 26 deletions.
18 changes: 8 additions & 10 deletions monitor/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ type monitorCTClient interface {
GetSTH(context.Context) (*ct.SignedTreeHead, error)
AddChain(context.Context, []ct.ASN1Cert) (*ct.SignedCertificateTimestamp, error)
AddPreChain(context.Context, []ct.ASN1Cert) (*ct.SignedCertificateTimestamp, error)
GetSTHConsistency(context.Context, uint64, uint64) ([][]byte, error)
}

// MonitorOptions is a struct for holding monitor configuration options
Expand Down Expand Up @@ -124,16 +125,13 @@ func New(opts MonitorOptions, logger *log.Logger, clk clock.Clock) (*Monitor, er
}

if opts.FetchOpts != nil {
m.fetcher = &sthFetcher{
logger: logger,
clk: clk,
stats: sthStats,
client: client,
logURI: opts.LogURI,
stopChannel: make(chan bool),
sthFetchInterval: opts.FetchOpts.Interval,
sthTimeout: opts.FetchOpts.Timeout,
}
m.fetcher = newSTHFetcher(
logger,
clk,
client,
opts.LogURI,
opts.FetchOpts.Interval,
opts.FetchOpts.Timeout)
}

if opts.SubmitOpts != nil {
Expand Down
15 changes: 14 additions & 1 deletion monitor/monitor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,22 @@ func (c errorClient) AddChain(_ context.Context, _ []ct.ASN1Cert) (*ct.SignedCer
return nil, errors.New("ct-log doesn't want any chains")
}

// AddPreChain mocked to always return an error
func (c errorClient) AddPreChain(_ context.Context, _ []ct.ASN1Cert) (*ct.SignedCertificateTimestamp, error) {
return nil, errors.New("ct-log doesn't want any prechains")
}

// GetSTHConsistency mocked to always return an error
func (c errorClient) GetSTHConsistency(_ context.Context, _ uint64, _ uint64) ([][]byte, error) {
return nil, errors.New("ct-log wants you to take its word that it is consistent")
}

// mockClient is a type implementing the monitorCTClient interface that always
// returns a fixed mock STH from `GetSTH` and a mock SCT from `AddChain`
// returns a mock STH from `GetSTH`, a mock SCT from `AddChain`, and a mock
// proof from `GetSTHConsistency`
type mockClient struct {
timestamp time.Time
proof [][]byte
}

// GetSTH mocked to always return a fixed mock STH
Expand All @@ -181,3 +189,8 @@ func (c mockClient) AddPreChain(_ context.Context, _ []ct.ASN1Cert) (*ct.SignedC
Timestamp: uint64(ts),
}, nil
}

// GetSTHConsistency mocked to always return a fixed consistency proof
func (c mockClient) GetSTHConsistency(_ context.Context, _ uint64, _ uint64) ([][]byte, error) {
return c.proof, nil
}
215 changes: 201 additions & 14 deletions monitor/sth_fetcher.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
package monitor

import (
"bytes"
"context"
"errors"
"fmt"
"log"
"sync"
"time"

ct "github.com/google/certificate-transparency-go"
"github.com/google/trillian/merkle"
"github.com/google/trillian/merkle/rfc6962"
"github.com/jmhodges/clock"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
Expand All @@ -14,10 +20,12 @@ import (
// sthFetchStats is a type to hold the prometheus metrics used by
// a sthFetcher
type sthFetchStats struct {
sthTimestamp *prometheus.GaugeVec
sthAge *prometheus.GaugeVec
sthFailures *prometheus.CounterVec
sthLatency *prometheus.HistogramVec
sthTimestamp *prometheus.GaugeVec
sthAge *prometheus.GaugeVec
sthFailures *prometheus.CounterVec
sthLatency *prometheus.HistogramVec
sthProofLatency *prometheus.HistogramVec
sthInconsistencies *prometheus.CounterVec
}

// sthStats is a sthFetchStats instance with promauto registered
Expand All @@ -40,6 +48,15 @@ var sthStats *sthFetchStats = &sthFetchStats{
Help: "Latency observing CT log signed tree head (STH)",
Buckets: internetFacingBuckets,
}, []string{"uri"}),
sthProofLatency: promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "sth_proof_latency",
Help: "Latency requesting CT signed tree head (STH) consistency proof",
Buckets: internetFacingBuckets,
}, []string{"uri"}),
sthInconsistencies: promauto.NewCounterVec(prometheus.CounterOpts{
Name: "sth_inconsistencies",
Help: "Count of times two CT log signed tree heads (STHs) could not be proved consistent",
}, []string{"uri", "type"}),
}

// FetcherOptions is a struct holding options for STH fetching.
Expand All @@ -62,6 +79,13 @@ func (o FetcherOptions) Valid() error {
return nil
}

// sthFetcherVerifier is an interface that specifies the merkle.LogVerifier
// functions that the sthFetcher uses. This interface allows for easy
// shimming of client methods with mock implementations for unit testing.
type sthFetcherVerifier interface {
VerifyConsistencyProof(int64, int64, []byte, []byte, [][]byte) error
}

// sthFetcher is a type for periodically fetching a log's STH and publishing
// metrics about it.
type sthFetcher struct {
Expand All @@ -75,8 +99,66 @@ type sthFetcher struct {

// How long to sleep between fetching the log's current STH
sthFetchInterval time.Duration
// How long to wait before giving up on an STH fetch
// How long to wait before giving up on an STH fetch or an STH consistency
// proof.
sthTimeout time.Duration

// prevSTH is the last STH that was fetched from the log
prevSTH *ct.SignedTreeHead

// sthLock is a Mutex for controlling updates to prevSTH
sthLock sync.Mutex

// verifier is used by verifySTHConsistency to prove consistency between two
// STHs
verifier sthFetcherVerifier
}

// newSTHFetcher returns an sthFetcher instance populated based on the provided
// arguments
func newSTHFetcher(
logger *log.Logger,
clk clock.Clock,
client monitorCTClient,
logURI string,
sthFetchInterval time.Duration,
sthTimeout time.Duration) *sthFetcher {
return &sthFetcher{
logger: logger,
clk: clk,
stats: sthStats,
client: client,
logURI: logURI,
stopChannel: make(chan bool),
sthFetchInterval: sthFetchInterval,
sthTimeout: sthTimeout,
verifier: merkle.NewLogVerifier(rfc6962.DefaultHasher),
}
}

// logErrorf formats a message to the sthFetcher's logger prefixed to identify
// that an error occurred, that it was an sth-fetcher, and the logURI the error
// affects
func (f *sthFetcher) logErrorf(format string, args ...interface{}) {
// TODO(@cpu): We should be using os.Stderr here but doing so will mean
// changing integration tests. See
// https://github.com/letsencrypt/ct-woodpecker/issues/36
f.logger.Printf(
fmt.Sprintf("[ERROR] sth-fetcher %s : %s\n", f.logURI, format),
args...)
}

// logf formats a message to write to the sthFetcher's logger prefixed to
// identify the source as an sth-fetcher for a specific logURI
func (f *sthFetcher) logf(format string, args ...interface{}) {
f.logger.Printf(fmt.Sprintf("sth-fetcher %s : %s\n", f.logURI, format), args...)
}

// log writes a message to the sthFetcher's logger prefixed to identify the
// source as an sth-fetcher for a specific logURI. For format string usage see
// logf.
func (f *sthFetcher) log(msg string) {
f.logger.Printf("sth-fetcher %s : %s\n", f.logURI, msg)
}

// Run starts the log STH fetching process by creating a goroutine that will loop
Expand All @@ -85,7 +167,7 @@ func (f *sthFetcher) run() {
go func() {
for {
go f.observeSTH()
f.logger.Printf("Sleeping for %s before next STH check\n", f.sthFetchInterval)
f.logf("Sleeping for %s before next STH check\n", f.sthFetchInterval)
select {
case <-f.stopChannel:
return
Expand All @@ -96,7 +178,7 @@ func (f *sthFetcher) run() {
}

func (f *sthFetcher) stop() {
f.logger.Printf("Stopping %s sthFetcher", f.logURI)
f.log("Stopping")
f.stopChannel <- true
}

Expand All @@ -105,28 +187,133 @@ func (f *sthFetcher) stop() {
// since the STH's timestamp is published to the `sth_age` metric. If an error
// occurs the `sth_failures` metric will be incremented. If the operation
// succeeds then the `sth_timestamp` gauge will be updated to the returned STH's
// timestamp.
// timestamp. The newly observerd STH will be stored as `f.prevSTH`. If
// `f.prevSTH` is not nil, then `observeSTH` will asynchronously validate
// consistency between `f.prevSTH` and the newly observed STH.
func (f *sthFetcher) observeSTH() {
labels := prometheus.Labels{"uri": f.logURI}
f.logger.Printf("Fetching STH for %q\n", f.logURI)
f.log("Fetching STH")

start := f.clk.Now()
ctx, cancel := context.WithTimeout(context.Background(), f.sthTimeout)
defer cancel()
sth, err := f.client.GetSTH(ctx)
newSTH, err := f.client.GetSTH(ctx)
elapsed := f.clk.Since(start)
f.stats.sthLatency.With(labels).Observe(elapsed.Seconds())

if err != nil {
f.logger.Printf("!! Error fetching STH: %s\n", err.Error())
f.logErrorf("failed to fetch STH: %q : %#v", err.Error(), err)
f.stats.sthFailures.With(labels).Inc()
return
}

f.stats.sthTimestamp.With(labels).Set(float64(sth.Timestamp))
ts := time.Unix(0, int64(sth.Timestamp)*int64(time.Millisecond))
f.stats.sthTimestamp.With(labels).Set(float64(newSTH.Timestamp))
ts := time.Unix(0, int64(newSTH.Timestamp)*int64(time.Millisecond))
sthAge := f.clk.Since(ts)
f.stats.sthAge.With(labels).Set(sthAge.Seconds())

f.logger.Printf("STH for %q verified. Timestamp: %s Age: %s\n", f.logURI, ts, sthAge)
f.logf("STH verified. Timestamp: %s Age: %s TreeSize: %d Root Hash: %x",
ts, sthAge, newSTH.TreeSize, newSTH.SHA256RootHash)

f.sthLock.Lock()
defer f.sthLock.Unlock()

if f.prevSTH != nil {
go f.verifySTHConsistency(f.prevSTH, newSTH)
}
f.prevSTH = newSTH
}

// verifySTHConsistency fetches and validates a consistency proof between
// firstSTH and secondSTH. If the two STHs don't verify then the
// `sth_inconsistencies` prometheus counter is incremented with a label
// indicating the category of inconsistency and an error is logged with
// `logErrorf`. Presently there are three possible categories of STH consistency
// failure:
// 1. "equal-treesize-inequal-hash" - the two STHs are the same tree size but
// have different root hashes.
// 2. "failed-to-get-proof" - the monitor encountered an error getting
// a consistency proof between the two STHs from the log.
// 3. "failed-to-verify-proof" - the monitor returned a proof of consistency
// between the two STHs that did not verify.
// When the monitor fetches a consistency proof from the log it publishes the
// latency of the operation to the `sth_proof_latency` prometheus histogram.
func (f *sthFetcher) verifySTHConsistency(firstSTH, secondSTH *ct.SignedTreeHead) {
if firstSTH == nil || secondSTH == nil {
f.logErrorf("firstSTH or secondSTH was nil")
return
}

firstTreeSize := firstSTH.TreeSize
firstHash := firstSTH.SHA256RootHash[:]

secondTreeSize := secondSTH.TreeSize
secondHash := secondSTH.SHA256RootHash[:]

// If the two STH's have equal tree sizes then we expect the SHA256RootHash to
// match. If it doesn't match there is no need to check the consistency proofs
// because the log is definitely inconsistent. In this case publish an
// increment to the `sthInconsistencies` stat
if firstTreeSize == secondTreeSize && !bytes.Equal(firstHash, secondHash) {
errorLabels := prometheus.Labels{"uri": f.logURI, "type": "equal-treesize-inequal-hash"}
f.stats.sthInconsistencies.With(errorLabels).Inc()
f.logErrorf("first STH and second STH have same tree size (%d) "+
"but different tree hashes. first.SHA256RootHash: %x "+
"second.SHA256RootHash: %x",
firstTreeSize,
firstHash,
secondHash)
return
} else if firstTreeSize == secondTreeSize {
// If the two STH's have equal tree sizes and equal SHA256RootHashes there
// isn't anything to do. We need STH's from two different tree states to
// verify consistency
f.logf("first STH and second STH have same SHA256RootHash (%x) "+
"and tree size (%d). No consistency proof required",
firstHash, firstTreeSize)
return
}

// proofDescription is used in log messages to describe the proof being
// fetched/verified
proofDescription := fmt.Sprintf(
"from treesize %d (hash %x) to treesize %d (hash %x)",
firstTreeSize, firstHash, secondTreeSize, secondHash)

// Fetch the consistency proof between the two tree sizes from the log.
// Observe the latency of this operation using the `sthProofLatency` stat. If
// the operation fails, consider it an inconsistency and publish an increment
// to the `sthInconsistencies` stat.
ctx, cancel := context.WithTimeout(context.Background(), f.sthTimeout)
defer cancel()
start := f.clk.Now()
f.logf("Getting consistency proof %s", proofDescription)
consistencyProof, err := f.client.GetSTHConsistency(ctx, firstTreeSize, secondTreeSize)
elapsed := f.clk.Since(start)
labels := prometheus.Labels{"uri": f.logURI}
f.stats.sthProofLatency.With(labels).Observe(elapsed.Seconds())
if err != nil {
errorLabels := prometheus.Labels{"uri": f.logURI, "type": "failed-to-get-proof"}
f.stats.sthInconsistencies.With(errorLabels).Inc()
f.logErrorf("failed to get consistency proof %s: %q : %#v",
proofDescription, err.Error(), err)
return
}

// Verify the consistency proof. If the proof fails to verify then publish an
// increment to the `sthInconsistencies` stat
if err := f.verifier.VerifyConsistencyProof(
int64(firstTreeSize),
int64(secondTreeSize),
firstHash,
secondHash,
consistencyProof); err != nil {
errorLabels := prometheus.Labels{"uri": f.logURI, "type": "failed-to-verify-proof"}
f.stats.sthInconsistencies.With(errorLabels).Inc()
f.logErrorf("failed to verify consistency proof %s: %q : %#v",
proofDescription, err.Error(), err)
return
}

f.logf("verified consistency proof %s", proofDescription)
}

0 comments on commit 6239df8

Please sign in to comment.