Skip to content

Commit

Permalink
Merge be93dad into b4e483d
Browse files Browse the repository at this point in the history
  • Loading branch information
beautifulentropy committed Mar 7, 2021
2 parents b4e483d + be93dad commit 013ba73
Show file tree
Hide file tree
Showing 15 changed files with 942 additions and 0 deletions.
68 changes: 68 additions & 0 deletions cmd/boulder-observer/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# boulder-observer
A modular config driven approach to black box monitoring with Prometheus


## Usage

### Help
```shell
$ go run ./cmd/boulder-observer/main.go -help
main:
-config string
Path to boulder-observer configuration file (default "config.yml")
```

### Starting the boulder-observer daemon
```shell
$ go run ./cmd/boulder-observer/main.go -config test/config-next/observer.yml
I181830 main ksKu7w4 Versions: main=(Unspecified Unspecified) Golang=(go1.15.7) BuildHost=(Unspecified)
I181830 main q_D84gk Initializing boulder-observer daemon from config: test/config-next/observer.yml
I181832 main 34Ccpgs status=[success] probe=[HTTP] duration=[128.386914ms] monitor=[http://letsencrypt.org-200]
I181832 main 0buu-wI status=[success] probe=[HTTP] duration=[148.592537ms] monitor=[https://letsencrypt.org-200]
I181834 main 1bL9-A0 status=[success] probe=[HTTP] duration=[24.501939ms] monitor=[https://letsencrypt.org-200]
I181834 main z-m2mAc status=[success] probe=[HTTP] duration=[48.078282ms] monitor=[http://letsencrypt.org-200]
I181835 main 1I-QuwM status=[success] probe=[DNS] duration=[5.318966ms] monitor=[udp-1.1.1.1:53-google.com-A]
I181835 main puyqZgA status=[success] probe=[DNS] duration=[8.766023ms] monitor=[tcp-1.1.1.1:53-google.com-A]
I181835 main 5cWimwc status=[success] probe=[DNS] duration=[15.923062ms] monitor=[udp-owen.ns.cloudflare.com:53-letsencrypt.org-A]
I181835 main jsjXxQ0 status=[success] probe=[DNS] duration=[19.95004ms] monitor=[tcp-owen.ns.cloudflare.com:53-letsencrypt.org-A]
```

## Configuration

### Observer
```yaml
debugAddr: :8040
syslog:
stdoutlevel: 6
sysloglevel: 6
monitors:
-
...
```

### Monitors

#### Configuring a DNS monitor
```yaml
monitors:
-
period: 10s
type: DNS
settings:
protocol: tcp
server: 8.8.8.8:53
recurse: true
query_name: google.com
query_type: A
```

#### Configuring an HTTP monitor
```yaml
monitors:
-
period: 2s
type: HTTP
settings:
url: https://letsencrypt.org
rcode: 200
```
48 changes: 48 additions & 0 deletions cmd/boulder-observer/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package main

import (
"errors"
"flag"
"io/ioutil"

"github.com/letsencrypt/boulder/cmd"
"github.com/letsencrypt/boulder/observer"
"gopkg.in/yaml.v2"
)

func main() {
configPath := flag.String(
"config", "config.yaml", "Path to boulder-observer configuration file")
flag.Parse()

configYAML, err := ioutil.ReadFile(*configPath)
cmd.FailOnError(err, "failed to read config file")

// parse YAML config
var config observer.ObsConf
err = yaml.Unmarshal(configYAML, &config)
if err != nil {
cmd.FailOnError(err, "failed to parse yaml config")
}

if config.DebugAddr == "" {
cmd.FailOnError(errors.New(""), "debugaddr is not defined")
}

// start monitoring and logging
prom, logger := cmd.StatsAndLogging(config.Syslog, config.DebugAddr)
defer logger.AuditPanic()
logger.Info(cmd.VersionString())

// validate config
err = config.Validate(logger)
if err != nil {
cmd.FailOnError(err, "config failed validation")
}

// start daemon
logger.Infof("Initializing boulder-observer daemon from config: %s", *configPath)
logger.Debugf("Using config: %+v", config)
observer := observer.New(config, logger, prom)
observer.Start()
}
58 changes: 58 additions & 0 deletions observer/mon_conf.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package observer

import (
"fmt"
"strings"

"github.com/letsencrypt/boulder/cmd"
p "github.com/letsencrypt/boulder/observer/probes"
"gopkg.in/yaml.v2"
)

type settings map[string]interface{}

// MonConf is exported to receive yaml configuration
type MonConf struct {
Valid bool
Period cmd.ConfigDuration `yaml:"period"`
Timeout int `yaml:"timeout"`
Probe string `yaml:"type"`
Settings settings `yaml:"settings"`
}

func (c MonConf) normalize() {
c.Probe = strings.ToLower(c.Probe)
}

func (c MonConf) unmashalProbeSettings() (p.Configurer, error) {
probeConf, err := p.GetProbeConf(c.Probe, c.Settings)
if err != nil {
return nil, err
}
s, _ := yaml.Marshal(c.Settings)
probeConf, err = probeConf.UnmarshalSettings(s)
if err != nil {
return nil, err
}
return probeConf, nil
}

// validate normalizes and validates the received monitor config
func (c *MonConf) validate() error {
c.normalize()
probeConf, err := c.unmashalProbeSettings()
if err != nil {
return err
}
err = probeConf.Validate()
if err != nil {
return fmt.Errorf("failed to validate probe: %s with settings: %+v due to: %w", c.Probe, probeConf, err)
}
c.Valid = true
return nil
}

func (c MonConf) getProber() p.Prober {
probeConf, _ := c.unmashalProbeSettings()
return probeConf.AsProbe()
}
53 changes: 53 additions & 0 deletions observer/monitor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package observer

import (
"strconv"
"time"

blog "github.com/letsencrypt/boulder/log"
p "github.com/letsencrypt/boulder/observer/probes"
"github.com/prometheus/client_golang/prometheus"
)

// monitor contains the parsed, normalized, and validated configuration
// describing a given oberver monitor
type monitor struct {
valid bool
period time.Duration
prober p.Prober
logger blog.Logger
metric prometheus.Registerer
}

// start creates a ticker channel then spins off a prober goroutine for
// each period specified in the monitor config and a timeout inferred
// from that period. This is not perfect, it means that the effective
// deadline for a prober goroutine will be TTL + time-to-schedule, but
// it's close enough for our purposes
func (m monitor) start() *time.Ticker {
ticker := time.NewTicker(m.period)
go func() {
for {
select {
case <-ticker.C:
result, dur := m.prober.Do(m.period)
statObservations.WithLabelValues(
m.prober.Name(), m.prober.Type(), strconv.FormatBool(result)).
Observe(dur.Seconds())
m.logger.Infof(
"type=[%s] result=[%v] duration=[%f] name=[%s]",
m.prober.Type(), result, dur.Seconds(), m.prober.Name())
}
}
}()
return ticker
}

func (m monitor) New(c MonConf, log blog.Logger, prom prometheus.Registerer) *monitor {
m.valid = c.Valid
m.period = c.Period.Duration
m.prober = c.getProber()
m.logger = log
m.metric = prom
return &m
}
76 changes: 76 additions & 0 deletions observer/obs_conf.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package observer

import (
"errors"
"fmt"

"github.com/letsencrypt/boulder/cmd"
blog "github.com/letsencrypt/boulder/log"
p "github.com/letsencrypt/boulder/observer/probes"
)

var (
errNewObsNoMons = errors.New("observer config is invalid, 0 monitors configured")
errNewObsEmpty = errors.New("observer config is empty")
)

// ObsConf is exported to receive yaml configuration
type ObsConf struct {
Syslog cmd.SyslogConfig `yaml:"syslog"`
DebugAddr string `yaml:"debugaddr"`
Modules []p.Configurer `yaml:"modules"`
MonConfs []*MonConf `yaml:"monitors"`
}

func (n *ObsConf) validateMonConfs() ([]error, bool) {
var validationErrs []error
for _, m := range n.MonConfs {
err := m.validate()
if err != nil {
validationErrs = append(validationErrs, err)
}
}

// all configured monitors are invalid, cannot continue
if len(n.MonConfs) == len(validationErrs) {
return validationErrs, false
}
return validationErrs, true
}

// Validate normalizes and validates the observer config as well as each
// monitor config. If no valid monitor configs remain, Validate will
// return an error indicating that observer cannot be started. In all
// instances the the rationale for invalidating a monitor will logged to
// stderr
func (n *ObsConf) Validate(log blog.Logger) error {
if n == nil {
return errNewObsEmpty
}

if len(n.MonConfs) == 0 {
return errNewObsNoMons
}

logErrs := func(errs []error, lenMons int) {
log.Errf("%d of %d monitors failed validation", len(errs), lenMons)
for _, err := range errs {
log.Errf("invalid monitor: %s", err)
}
}

errs, ok := n.validateMonConfs()

// if no valid mons remain, log validation errors, and return in
// error
if len(errs) != 0 && !ok {
logErrs(errs, len(n.MonConfs))
return fmt.Errorf("no valid mons, cannot continue")
}

// if at least 1 valid monitor remains, only log validation errors
if len(errs) != 0 && ok {
logErrs(errs, len(n.MonConfs))
}
return nil
}
69 changes: 69 additions & 0 deletions observer/observer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package observer

import (
"strconv"

blog "github.com/letsencrypt/boulder/log"
"github.com/letsencrypt/boulder/metrics"

// _ are probes imported to trigger init func
_ "github.com/letsencrypt/boulder/observer/probes/dns"
_ "github.com/letsencrypt/boulder/observer/probes/http"
"github.com/prometheus/client_golang/prometheus"
)

var (
statTotalMonitors = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "obs_monitors",
Help: "count of configured monitors",
},
[]string{"name", "type", "valid"},
)
statObservations = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "obs_observations",
Help: "time taken for a monitor to perform a request/query",
Buckets: metrics.InternetFacingBuckets,
},
[]string{"name", "type", "result"},
)
)

// Observer contains the parsed, normalized, and validated configuration
// describing a collection of monitors and the metrics to be collected
type Observer struct {
Logger blog.Logger
Metric prometheus.Registerer
Monitors []*monitor
}

// Start registers global metrics and spins off a goroutine for each of
// the configured monitors
func (o Observer) Start() {
// register metrics
o.Metric.MustRegister(statTotalMonitors)
o.Metric.MustRegister(statObservations)

// start each monitor
for _, mon := range o.Monitors {
if mon.valid {
// TODO(@beautifulentropy): track and restart unhealthy goroutines
go mon.start()
}
statTotalMonitors.WithLabelValues(
mon.prober.Name(), mon.prober.Type(), strconv.FormatBool(mon.valid)).Inc()
}
// run forever
select {}
}

// New creates new observer and it's corresponding monitor objects
func New(c ObsConf, l blog.Logger, p prometheus.Registerer) *Observer {
var monitors []*monitor
for _, monConf := range c.MonConfs {
var mon monitor
monitors = append(monitors, mon.New(*monConf, l, p))
}
return &Observer{l, p, monitors}
}

0 comments on commit 013ba73

Please sign in to comment.