/
cmd.go
127 lines (107 loc) · 3.35 KB
/
cmd.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
package cmd
import (
"context"
"net/http"
"os"
"strconv"
"time"
"github.com/prometheus/client_golang/prometheus/promhttp"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
"github.com/metal-toolbox/audito-maldito/internal/health"
"github.com/metal-toolbox/audito-maldito/internal/metrics"
)
const usage = `audito-maldito
DESCRIPTION
audito-maldito is a daemon that monitors OpenSSH server logins and
produces structured audit events describing what authenticated users
did while logged in (e.g., what programs they executed).
OPTIONS
`
var logger *zap.SugaredLogger
const (
// DefaultHTTPServerReadTimeout is the default HTTP server read timeout.
DefaultHTTPServerReadTimeout = 1 * time.Second
// DefaultHTTPServerReadHeaderTimeout is the default HTTP server read header timeout.
DefaultHTTPServerReadHeaderTimeout = 5 * time.Second
// DefaultAuditCheckInterval when to check audit.log modify time.
DefaultAuditCheckInterval = 15 * time.Second
// DefaultAuditModifyTimeThreshold seconds since last write to audit.log before alerting.
DefaultAuditModifyTimeThreshold = 86400
)
type metricsConfig struct {
enableMetrics bool
enableHealthz bool
enableAuditMetrics bool
httpServerReadTimeout time.Duration
httpServerReadHeaderTimeout time.Duration
auditMetricsSecondsInterval time.Duration
auditLogWriteTimeSecondThreshold int
}
// handleMetricsAndHealth starts a HTTP server on port 2112 to serve metrics
// and health endpoints.
//
// If metrics are disabled, the /metrics endpoint will return 404.
// If health is disabled, the /readyz endpoint will return 404.
// If both are disabled, the HTTP server will not be started.
func handleMetricsAndHealth(ctx context.Context, mc metricsConfig, eg *errgroup.Group, h *health.Health) {
server := &http.Server{
Addr: ":2112",
ReadTimeout: mc.httpServerReadTimeout,
ReadHeaderTimeout: mc.httpServerReadHeaderTimeout,
}
if mc.enableMetrics {
http.Handle("/metrics", promhttp.Handler())
}
if mc.enableHealthz {
http.Handle("/readyz", h.ReadyzHandler())
// TODO: Add livez endpoint
}
if mc.enableMetrics || mc.enableHealthz {
eg.Go(func() error {
logger.Infof("starting HTTP server on address '%s'...", server.Addr)
if err := server.ListenAndServe(); err != nil {
return err
}
return nil
})
eg.Go(func() error {
<-ctx.Done()
logger.Infoln("stopping HTTP server...")
return server.Shutdown(ctx)
})
}
}
func handleAuditLogMetrics(
ctx context.Context,
mc metricsConfig,
eg *errgroup.Group,
pprov *metrics.PrometheusMetricsProvider,
) {
if !mc.enableAuditMetrics {
return
}
auditLogFilePath := "/var/log/audit/audit.log"
eg.Go(func() error {
ticker := time.NewTicker(mc.auditMetricsSecondsInterval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
s, err := os.Stat(auditLogFilePath)
if err != nil {
logger.Errorf("error stat-ing %s", auditLogFilePath)
continue
}
if time.Since(s.ModTime()).Seconds() > float64(mc.auditLogWriteTimeSecondThreshold) {
pprov.SetAuditLogCheck(0, strconv.Itoa(mc.auditLogWriteTimeSecondThreshold))
} else {
pprov.SetAuditLogCheck(1, strconv.Itoa(mc.auditLogWriteTimeSecondThreshold))
}
pprov.SetAuditLogModifyTime(float64(s.ModTime().Unix()))
case <-ctx.Done():
return ctx.Err()
}
}
})
}