Skip to content

Commit

Permalink
Squashing Fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
beautifulentropy committed Mar 7, 2021
1 parent b2688b1 commit f6575ed
Show file tree
Hide file tree
Showing 22 changed files with 813 additions and 580 deletions.
147 changes: 94 additions & 53 deletions cmd/boulder-observer/README.md
@@ -1,86 +1,127 @@
# boulder-observer
A modular config driven approach to black box monitoring with Prometheus
A modular config driven approach to black box monitoring with
Prometheus.

## Metrics
Observer provides the following metrics.

### obs_monitors
Count of configured monitors.

**Labels:**

`name`: name of the monitor

`type`: type of prober the monitor is configured to use

`valid`: whether the monitor configuration was valid

### obs_observations
Time taken, in seconds, for a monitor to perform a request/ query.

**Labels:**

`name`: name of the monitor

`type`: type of prober the monitor is configured to use

`result`: whether the query/ request was successful

**Buckets:**

`.1, .25, .5, 1, 2.5, 5, 7.5, 10, 15, 30, 45`

## Usage
### Starting the `observer` daemon
```shell
$ ./observer/plugins/build.sh && go run ./cmd/boulder-observer/main.go -config test/config-next/observer.yaml
Building plugins:
⚙️ observer/plugins/dns.so
✅dns.so
⚙️ observer/plugins/http.so
✅http.so
OK
I191418 main ksKu7w4 Versions: main=(Unspecified Unspecified) Golang=(go1.15.7) BuildHost=(Unspecified)
I191418 main o9me0QI Initializing boulder-observer daemon from config: test/config-next/observer.yaml
I191420 main wv7tug0 HTTP monitor "https://letsencrypt.org-200" succeeded while taking:=120.900665ms
I191422 main ss-hzQ8 HTTP monitor "https://letsencrypt.org-200" succeeded while taking:=23.051998ms
I191424 main -fD46gg HTTP monitor "https://letsencrypt.org-200" succeeded while taking:=23.419121ms
I191426 main urmy8AM HTTP monitor "https://letsencrypt.org-200" succeeded while taking:=23.875478ms
I191428 main qaGe0Qc DNS monitor "udp-8.8.8.8:53-google.com-A" succeeded while taking:=5.088261ms
I191428 main i677rw0 DNS monitor "tcp-8.8.8.8:53-google.com-A" succeeded while taking:=5.156114ms
I191428 main ooyq_Qo DNS monitor "udp-owen.ns.cloudflare.com:53-letsencrypt.org-A" succeeded while taking:=15.858563ms
```

### Help
```shell
$ go run ./cmd/boulder-observer/main.go -help
main:
-config string
Path to boulder-observer configuration file (default "config.yaml")
Path to boulder-observer configuration file (default "config.yml")
```

### Starting the boulder-observer daemon
```shell
$ go run ./cmd/boulder-observer/main.go -config test/config-next/observer.yml
I142601 main ksKu7w4 Versions: main=(Unspecified Unspecified) Golang=(go1.15.7) BuildHost=(Unspecified)
I142601 main q_D84gk Initializing boulder-observer daemon from config: test/config-next/observer.yml
I142603 main o4Cp-Q0 type=[HTTP] result=[true] duration=[0.123472] name=[http://letsencrypt.org-200]
I142603 main n4iSrAM type=[HTTP] result=[true] duration=[0.123751] name=[https://letsencrypt.org-200]
I142605 main qe3Gugc type=[HTTP] result=[true] duration=[0.023499] name=[https://letsencrypt.org-200]
I142605 main _J2k0wo type=[HTTP] result=[true] duration=[0.044429] name=[http://letsencrypt.org-200]
I142606 main zomKjwc type=[DNS] result=[false] duration=[0.000017] name=[udp-2606:4700:4700::1111:53-google.com-A]
I142606 main 6parpwM type=[DNS] result=[false] duration=[0.000014] name=[tcp-2606:4700:4700::1111:53-google.com-A]
I142606 main pJqFmAs type=[DNS] result=[true] duration=[0.004667] name=[udp-1.1.1.1:53-google.com-A]
I142606 main 9f7d2AM type=[DNS] result=[true] duration=[0.008965] name=[tcp-1.1.1.1:53-google.com-A]
I142606 main 962rkgM type=[DNS] result=[true] duration=[0.013107] name=[udp-owen.ns.cloudflare.com:53-letsencrypt.org-A]
I142606 main l-r29gc type=[DNS] result=[true] duration=[0.016294] name=[tcp-owen.ns.cloudflare.com:53-letsencrypt.org-A]
I142607 main t_vrtAQ type=[HTTP] result=[true] duration=[0.022378] name=[https://letsencrypt.org-200]
I142607 main v7SjtQM type=[HTTP] result=[true] duration=[0.043780] name=[http://letsencrypt.org-200]
I142609 main ptjWkQM type=[HTTP] result=[true] duration=[0.021068] name=[https://letsencrypt.org-200]
I142609 main jPzToww type=[HTTP] result=[true] duration=[0.042141] name=[http://letsencrypt.org-200]
I142611 main 5IygqAI type=[DNS] result=[false] duration=[0.000019] name=[udp-2606:4700:4700::1111:53-google.com-A]
I142611 main zqe61Qk type=[DNS] result=[false] duration=[0.000012] name=[tcp-2606:4700:4700::1111:53-google.com-A]
I142611 main k9Xh1AU type=[DNS] result=[true] duration=[0.008134] name=[udp-8.8.8.8:53-google.com-A]
I142611 main trL2mwU type=[DNS] result=[true] duration=[0.008801] name=[udp-1.1.1.1:53-google.com-A]
I142611 main _qLDgwk type=[DNS] result=[true] duration=[0.011323] name=[tcp-8.8.8.8:53-google.com-A]
I142611 main rJDj2AI type=[DNS] result=[true] duration=[0.012559] name=[tcp-1.1.1.1:53-google.com-A]
I142611 main teWD6Qs type=[DNS] result=[true] duration=[0.015299] name=[udp-owen.ns.cloudflare.com:53-letsencrypt.org-A]
I142611 main kPrnlg4 type=[DNS] result=[true] duration=[0.019022] name=[tcp-owen.ns.cloudflare.com:53-letsencrypt.org-A]
I142611 main xb_w9gs type=[HTTP] result=[true] duration=[0.025506] name=[https://letsencrypt.org-200]
I142611 main oKi2ggk type=[HTTP] result=[true] duration=[0.074734] name=[http://letsencrypt.org-200]
I142613 main wPqP-gg type=[HTTP] result=[true] duration=[0.021814] name=[https://letsencrypt.org-200]
I142613 main 4IrYoQY type=[HTTP] result=[true] duration=[0.041857] name=[http://letsencrypt.org-200]
```

## Configuration

### Observer
```yaml
debugAddr: 8040
syslog:
debugAddr: :8040
syslog:
stdoutlevel: 6
sysloglevel: 6
timeout: 5
monitors: []
monitors:
-
...
```

### Monitors

#### Using the DNS plugin
#### Configuring a DNS monitor
```yaml
monitors:
-
enabled: true
period: 1
plugin:
name: DNS
path: "./cmd/boulder-observer/observer/plugins/dns.so"
period: 10s
type: DNS
settings:
qproto: udp
qrecurse: false
qname: letsencrypt.org
qtype: A
qserver: "owen.ns.cloudflare.com:53"
protocol: tcp
server: 8.8.8.8:53
recurse: true
query_name: google.com
query_type: A
```

#### Using the HTTP plugin
#### Configuring an HTTP monitor
```yaml
monitors:
-
enabled: true
period: 1
plugin:
name: HTTP
path: "./cmd/boulder-observer/observer/plugins/http.so"
period: 2s
type: HTTP
settings:
url: https://letsencrypt.org
rcode: 200
```

### Plugins
**Building plugins**
## Development

### Starting Prometheus locally
Please note, this requires a local prometheus binary.
```shell
$ ./observer/plugins/build.sh
Building plugins:
⚙️ observer/plugins/dns.so
✅dns.so
⚙️ observer/plugins/http.so
✅http.so
OK
```
prometheus --config.file=boulder/test/prometheus/prometheus.yml
```

### Viewing metrics locally
When developing with a local prometheus instance, you can use this link
to view metrics:
[link](http://0.0.0.0:9090/graph?g0.expr=sum%20by(name)%20(%0Arate(obs_observations_bucket%7Bresult%3D%22true%22%7D%5B1m%5D)%0A)&g0.tab=0&g0.stacked=0&g0.range_input=1h&g1.expr=sum%20by(name)%20(%0Arate(obs_observations_bucket%7Bresult%3D%22false%22%7D%5B1m%5D)%0A)&g1.tab=0&g1.stacked=0&g1.range_input=1h&g2.expr=count%20by(valid)%20(%0Aobs_monitors%7Bvalid%3D%22true%22%7D%0A)&g2.tab=0&g2.stacked=0&g2.range_input=1h)
15 changes: 10 additions & 5 deletions cmd/boulder-observer/main.go
@@ -1,6 +1,7 @@
package main

import (
"errors"
"flag"
"io/ioutil"

Expand All @@ -21,20 +22,24 @@ func main() {
var config observer.ObsConf
err = yaml.Unmarshal(configYAML, &config)
if err != nil {
cmd.FailOnError(err, "failed to parse YAML config")
cmd.FailOnError(err, "failed to parse yaml config")
}

// validate config
err = config.Validate()
if err != nil {
cmd.FailOnError(err, "YAML config failed validation")
if config.DebugAddr == "" {
cmd.FailOnError(errors.New(""), "debugaddr is not defined")
}

// start monitoring and logging
prom, logger := cmd.StatsAndLogging(config.Syslog, config.DebugAddr)
defer logger.AuditPanic()
logger.Info(cmd.VersionString())

// validate config
err = config.Validate(logger)
if err != nil {
cmd.FailOnError(err, "config failed validation")
}

// start daemon
logger.Infof("Initializing boulder-observer daemon from config: %s", *configPath)
logger.Debugf("Using config: %+v", config)
Expand Down
58 changes: 37 additions & 21 deletions observer/mon_conf.go
@@ -1,44 +1,60 @@
package observer

import (
"errors"
"fmt"
"strings"

"github.com/letsencrypt/boulder/observer/plugins"
"github.com/letsencrypt/boulder/cmd"
p "github.com/letsencrypt/boulder/observer/probes"
"gopkg.in/yaml.v2"
)

var (
errNewMonEmpty = errors.New("monitor config is empty")
errNewMonInvalid = errors.New("monitor config is invalid")
)
type settings map[string]interface{}

// MonConf is exported to receive the supplied monitor config
// MonConf is exported to receive yaml configuration
type MonConf struct {
Enabled bool `yaml:"enabled"`
Period int `yaml:"period"`
Timeout int `yaml:"timeout"`
Plugin plugins.Info `yaml:"plugin"`
Settings map[string]interface{} `yaml:"settings"`
Valid bool
Period cmd.ConfigDuration `yaml:"period"`
Timeout int `yaml:"timeout"`
Kind string `yaml:"type"`
Settings settings `yaml:"settings"`
}

func (c MonConf) normalize() {
c.Plugin.Name = strings.ToLower(c.Plugin.Name)
c.Plugin.Path = strings.ToLower(c.Plugin.Path)
c.Kind = strings.ToLower(c.Kind)
}

func (c MonConf) unmashalProbeSettings() (p.Configurer, error) {
probeConf, err := p.GetProbeConf(c.Kind, c.Settings)
if err != nil {
return nil, err
}
s, _ := yaml.Marshal(c.Settings)
probeConf, err = probeConf.UnmarshalSettings(s)
if err != nil {
return nil, err
}
return probeConf, nil
}

// validate normalizes and validates the received monitor config
func (c MonConf) validate() error {
func (c *MonConf) validate() error {
c.normalize()
pluginConf, err := plugins.GetPluginConf(c.Settings, c.Plugin.Path, c.Plugin.Name)
probeConf, err := c.unmashalProbeSettings()
if err != nil {
if err != nil {
return fmt.Errorf("failed to get plugin: %w", err)
}
return err
}
err = pluginConf.Validate()
err = probeConf.Validate()
if err != nil {
return fmt.Errorf("failed to validate plugin settings: %w", err)
return fmt.Errorf(
"failed to validate: %s prober with settings: %+v due to: %w",
c.Kind, probeConf, err)
}
c.Valid = true
return nil
}

func (c MonConf) getProber() p.Prober {
probeConf, _ := c.unmashalProbeSettings()
return probeConf.AsProbe()
}
54 changes: 22 additions & 32 deletions observer/monitor.go
@@ -1,54 +1,44 @@
package observer

import (
"strconv"
"time"

blog "github.com/letsencrypt/boulder/log"
"github.com/letsencrypt/boulder/observer/plugins"
p "github.com/letsencrypt/boulder/observer/probes"
"github.com/prometheus/client_golang/prometheus"
)

// monitor contains the parsed, normalized, and validated configuration
// describing a given oberver monitor
type monitor struct {
name string
period time.Duration
timeout time.Duration
pluginIs string
probe plugins.Plugin
logger blog.Logger
metric prometheus.Registerer
valid bool
period time.Duration
prober p.Prober
logger blog.Logger
metric prometheus.Registerer
}

// start creates a ticker channel then spins off a prober goroutine for
// each period specified in the monitor config and a timeout inferred
// from that period. This is not perfect, it means that the effective
// deadline for a prober goroutine will be TTL + time-to-schedule, but
// it's close enough for our purposes
func (m monitor) start() *time.Ticker {
ticker := time.NewTicker(m.period)
go func() {
for {
select {
case tick := <-ticker.C:
success, took := m.probe.Do(tick, m.timeout)
statTotalObservations.WithLabelValues(m.pluginIs, m.name).Add(1)
if !success {
statTotalErrors.WithLabelValues(m.pluginIs, m.name).Add(1)
m.logger.Infof("%s monitor %q failed while taking:=%s", m.pluginIs, m.name, took.String())
return
}
m.logger.Infof("%s monitor %q succeeded while taking:=%s", m.pluginIs, m.name, took.String())
case <-ticker.C:
result, dur := m.prober.Do(m.period)
statObservations.WithLabelValues(
m.prober.Name(), m.prober.Type(), strconv.FormatBool(result)).
Observe(dur.Seconds())
m.logger.Infof(
"type=[%s] result=[%v] duration=[%f] name=[%s]",
m.prober.Type(), result, dur.Seconds(), m.prober.Name())
}
}
}()
return ticker
}

func (m monitor) New(c MonConf, log blog.Logger, prom prometheus.Registerer, t int) *monitor {
if c.Timeout == 0 {
c.Timeout = t
}
plugin, _ := plugins.GetPluginConf(c.Settings, c.Plugin.Path, c.Plugin.Name)
m.name = plugin.GetMonitorName()
m.period = time.Duration(c.Period * 1000000000)
m.timeout = time.Duration(c.Timeout * 1000000000)
m.pluginIs = c.Plugin.Name
m.probe = plugin.AsProbe()
m.logger = log
m.metric = prom
return &m
}

0 comments on commit f6575ed

Please sign in to comment.