Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,31 @@ require (
gopkg.in/yaml.v3 v3.0.1
)

require github.com/nginx/agent/sdk/v2 v2.0.0-00010101000000-000000000000
require (
github.com/nginx/agent/sdk/v2 v2.0.0-00010101000000-000000000000
github.com/prometheus/client_golang v1.13.0
)

require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.1.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/inconshreveable/mousetrap v1.0.1 // indirect
github.com/lufia/plan9stats v0.0.0-20220517141722-cf486979b281 // indirect
github.com/magiconair/properties v1.8.6 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
github.com/nginxinc/nginx-go-crossplane v0.4.1 // indirect
github.com/pascaldekloe/name v1.0.1 // indirect
github.com/pelletier/go-toml v1.9.5 // indirect
github.com/pelletier/go-toml/v2 v2.0.2 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/power-devops/perfstat v0.0.0-20220216144756-c35f1ee13d7c // indirect
github.com/prometheus/client_model v0.2.0 // indirect
github.com/prometheus/common v0.37.0 // indirect
github.com/prometheus/procfs v0.8.0 // indirect
github.com/spf13/afero v1.9.2 // indirect
github.com/spf13/cast v1.5.0 // indirect
github.com/spf13/jwalterweatherman v1.1.0 // indirect
Expand Down
82 changes: 82 additions & 0 deletions go.sum

Large diffs are not rendered by default.

52 changes: 51 additions & 1 deletion go.work.sum

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func main() {
sdkGRPC.InitMeta(loadedConfig.ClientID, loadedConfig.CloudAccountID)

controller, commander, reporter := createGrpcClients(ctx, loadedConfig)

if controller != nil {
if err := controller.Connect(); err != nil {
log.Warnf("Unable to connect to control plane: %v", err)
Expand Down Expand Up @@ -135,15 +135,15 @@ func handleSignals(
}

func connectionUnavilable(loadedConfig *config.Config) bool {
return loadedConfig.Server.Host == "" || loadedConfig.Server.GrpcPort == 0
return loadedConfig.Server.Host == "" || loadedConfig.Server.GrpcPort == 0
}

func createGrpcClients(ctx context.Context, loadedConfig *config.Config) (client.Controller, client.Commander, client.MetricReporter) {
if connectionUnavilable(loadedConfig) {
log.Infof("GRPC clients not created")
return nil, nil, nil
}

grpcDialOptions := setDialOptions(loadedConfig)
secureMetricsDialOpts, err := sdkGRPC.SecureDialOptions(
loadedConfig.TLS.Enable,
Expand Down
1 change: 1 addition & 0 deletions sdk/proto/events/event.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions src/core/config/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ var (
Path: "/var/log/nginx-agent",
},
Server: Server{
Command: "",
Metrics: "",
Command: "",
Metrics: "",
// token needs to be validated on the server side - can be overridden by the config value or the cli / environment variable
// so setting to random uuid at the moment, tls connection won't work without the auth header
Token: uuid.New().String(),
Expand Down Expand Up @@ -204,12 +204,12 @@ var (
DefaultValue: Defaults.Log.Path,
},
&StringFlag{
Name: ServerHost,
Usage: "The IP address of the server host. IPv4 addresses and hostnames are supported.",
Name: ServerHost,
Usage: "The IP address of the server host. IPv4 addresses and hostnames are supported.",
},
&IntFlag{
Name: ServerGrpcPort,
Usage: "The desired GRPC port to use for nginx-agent traffic.",
Name: ServerGrpcPort,
Usage: "The desired GRPC port to use for nginx-agent traffic.",
},
&StringFlag{
Name: ServerToken,
Expand Down
245 changes: 15 additions & 230 deletions src/core/metrics/aggregate.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,248 +79,33 @@ func GenerateMetricsReport(metricsCollections Collections) *proto.MetricsReport

func getAggregatedSimpleMetric(count int, internalMap map[string]float64) (simpleMetrics []*proto.SimpleMetric) {

// The Catalogs is source of truth of what kind of calculation should apply to the individual metric
// TODO retrieve this info from Catalog or read from config file
calcFn := map[string]MetricsHandler{
"system.cpu.idle": avg,
"system.cpu.iowait": avg,
"system.cpu.stolen": avg,
"system.cpu.system": avg,
"system.cpu.user": avg,
"system.disk.free": avg,
"system.disk.in_use": avg,
"system.disk.total": avg,
"system.disk.used": avg,
"system.io.iops_r": sum,
"system.io.iops_w": sum,
"system.io.kbs_r": sum,
"system.io.kbs_w": sum,
"system.io.wait_r": sum,
"system.io.wait_w": sum,
"system.mem.available": avg,
"system.mem.buffered": avg,
"system.mem.cached": avg,
"system.mem.free": avg,
"system.mem.pct_used": avg,
"system.mem.shared": avg,
"system.mem.total": avg,
"system.mem.used": avg,
"system.mem.used.all": avg,
"system.load.1": avg,
"system.load.15": avg,
"system.load.5": avg,
"system.swap.free": avg,
"system.swap.pct_free": avg,
"system.swap.total": avg,
"system.swap.used": avg,
"system.net.bytes_rcvd": sum,
"system.net.bytes_sent": sum,
"system.net.drops_in.count": sum,
"system.net.drops_out.count": sum,
"system.net.listen_overflows": sum,
"system.net.packets_in.count": sum,
"system.net.packets_in.error": sum,
"system.net.packets_out.count": sum,
"system.net.packets_out.error": sum,
"nginx.status": boolean,
"nginx.config.generation": sum,
"nginx.http.gzip.ratio": avg,
"nginx.http.status.1xx": sum,
"nginx.http.status.2xx": sum,
"nginx.http.status.3xx": sum,
"nginx.http.status.4xx": sum,
"nginx.http.status.5xx": sum,
"nginx.http.status.403": sum,
"nginx.http.status.404": sum,
"nginx.http.status.500": sum,
"nginx.http.status.502": sum,
"nginx.http.status.503": sum,
"nginx.http.status.504": sum,
"nginx.http.status.discarded": sum,
"nginx.http.method.delete": sum,
"nginx.http.method.get": sum,
"nginx.http.method.head": sum,
"nginx.http.method.options": sum,
"nginx.http.method.post": sum,
"nginx.http.method.put": sum,
"nginx.http.method.others": sum,
"nginx.http.request.bytes_sent": sum,
"nginx.http.request.body_bytes_sent": sum,
"nginx.http.request.length": avg,
"nginx.http.request.malformed": sum,
"nginx.http.request.time": avg,
"nginx.http.request.time.count": sum,
"nginx.http.request.time.max": avg,
"nginx.http.request.time.median": avg,
"nginx.http.request.time.pctl95": avg,
"nginx.http.request.count": sum,
"nginx.http.request.current": avg,
"nginx.http.request.buffered": sum,
"nginx.http.v0_9": sum,
"nginx.http.v1_0": sum,
"nginx.http.v1_1": sum,
"nginx.http.v2": sum,
"nginx.http.conn.handled": sum,
"nginx.http.conn.reading": avg,
"nginx.http.conn.writing": avg,
"nginx.http.conn.accepted": sum,
"nginx.http.conn.active": avg,
"nginx.http.conn.current": avg,
"nginx.http.conn.dropped": sum,
"nginx.http.conn.idle": avg,
"nginx.upstream.response.buffered": sum,
"nginx.upstream.request.failed": sum,
"nginx.upstream.response.failed": sum,
"nginx.workers.count": avg,
"nginx.workers.rlimit_nofile": avg,
"nginx.workers.cpu.user": sum,
"nginx.workers.cpu.system": sum,
"nginx.workers.cpu.total": sum,
"nginx.workers.fds_count": avg,
"nginx.workers.mem.vms": sum,
"nginx.workers.mem.rss": sum,
"nginx.workers.mem.rss_pct": avg,
"nginx.workers.io.kbs_r": sum,
"nginx.workers.io.kbs_w": sum,
"plus.http.limit_conns.passed": sum,
"plus.http.limit_conns.rejected": sum,
"plus.http.limit_conns.rejected_dry_run": sum,
"plus.http.limit_reqs.passed": sum,
"plus.http.limit_reqs.delayed": sum,
"plus.http.limit_reqs.rejected": sum,
"plus.http.limit_reqs.delayed_dry_run": sum,
"plus.http.limit_reqs.rejected_dry_run": sum,
"plus.cache.bypass.responses": sum,
"plus.cache.bypass.bytes": sum,
"plus.cache.expired.responses": sum,
"plus.cache.expired.bytes": sum,
"plus.cache.hit.responses": sum,
"plus.cache.hit.bytes": sum,
"plus.cache.miss.responses": sum,
"plus.cache.miss.bytes": sum,
"plus.cache.revalidated.responses": sum,
"plus.cache.revalidated.bytes": sum,
"plus.cache.size": avg,
"plus.cache.max_size": avg,
"plus.cache.stale.responses": sum,
"plus.cache.stale.bytes": sum,
"plus.cache.updating.responses": sum,
"plus.cache.updating.bytes": sum,
"plus.http.request.bytes_rcvd": sum,
"plus.http.request.bytes_sent": sum,
"plus.http.request.count": sum,
"plus.http.response.count": sum,
"plus.ssl.failed": sum,
"plus.ssl.handshakes": sum,
"plus.ssl.reuses": sum,
"plus.http.status.1xx": sum,
"plus.http.status.2xx": sum,
"plus.http.status.3xx": sum,
"plus.http.status.4xx": sum,
"plus.http.status.5xx": sum,
"plus.http.status.discarded": sum,
"plus.http.status.processing": avg,
"plus.stream.bytes_rcvd": sum,
"plus.stream.bytes_sent": sum,
"plus.stream.connections": sum,
"plus.stream.processing": avg,
"plus.stream.discarded": sum,
"plus.stream.status.2xx": sum,
"plus.stream.status.4xx": sum,
"plus.stream.status.5xx": sum,
"plus.stream.status.total": sum,
"plus.http.upstream.zombies": avg,
"plus.http.upstream.keepalives": avg,
"plus.http.upstream.queue.maxsize": avg,
"plus.http.upstream.queue.overflows": sum,
"plus.http.upstream.queue.size": avg,
"plus.http.upstream.peers.conn.active": avg,
"plus.http.upstream.peers.header_time": avg,
"plus.http.upstream.peers.response.time": avg,
"plus.http.upstream.peers.request.count": sum,
"plus.http.upstream.peers.response.count": sum,
"plus.http.upstream.peers.status.1xx": sum,
"plus.http.upstream.peers.status.2xx": sum,
"plus.http.upstream.peers.status.3xx": sum,
"plus.http.upstream.peers.status.4xx": sum,
"plus.http.upstream.peers.status.5xx": sum,
"plus.http.upstream.peers.bytes_sent": sum,
"plus.http.upstream.peers.bytes_rcvd": sum,
"plus.http.upstream.peers.fails": sum,
"plus.http.upstream.peers.unavail": sum,
"plus.http.upstream.peers.health_checks.fails": sum,
"plus.http.upstream.peers.health_checks.unhealthy": sum,
"plus.http.upstream.peers.health_checks.checks": sum,
"plus.http.upstream.peers.state.up": avg,
"plus.http.upstream.peers.state.draining": avg,
"plus.http.upstream.peers.state.down": avg,
"plus.http.upstream.peers.state.unavail": avg,
"plus.http.upstream.peers.state.checking": avg,
"plus.http.upstream.peers.state.unhealthy": avg,
"plus.http.upstream.peers.total.up": avg,
"plus.http.upstream.peers.total.draining": avg,
"plus.http.upstream.peers.total.down": avg,
"plus.http.upstream.peers.total.unavail": avg,
"plus.http.upstream.peers.total.checking": avg,
"plus.http.upstream.peers.total.unhealthy": avg,
"plus.stream.upstream.zombies": avg,
"plus.stream.upstream.peers.conn.active": avg,
"plus.stream.upstream.peers.conn.count": sum,
"plus.stream.upstream.peers.connect_time": avg,
"plus.stream.upstream.peers.ttfb": avg,
"plus.stream.upstream.peers.response.time": avg,
"plus.stream.upstream.peers.bytes_sent": sum,
"plus.stream.upstream.peers.bytes_rcvd": sum,
"plus.stream.upstream.peers.fails": sum,
"plus.stream.upstream.peers.unavail": sum,
"plus.stream.upstream.peers.health_checks.fails": sum,
"plus.stream.upstream.peers.health_checks.unhealthy": sum,
"plus.stream.upstream.peers.health_checks.checks": sum,
"plus.stream.upstream.peers.state.up": avg,
"plus.stream.upstream.peers.state.draining": avg,
"plus.stream.upstream.peers.state.down": avg,
"plus.stream.upstream.peers.state.unavail": avg,
"plus.stream.upstream.peers.state.checking": avg,
"plus.stream.upstream.peers.state.unhealthy": avg,
"plus.stream.upstream.peers.total.up": avg,
"plus.stream.upstream.peers.total.draining": avg,
"plus.stream.upstream.peers.total.down": avg,
"plus.stream.upstream.peers.total.unavail": avg,
"plus.stream.upstream.peers.total.checking": avg,
"plus.stream.upstream.peers.total.unhealthy": avg,
"plus.slab.pages.used": avg,
"plus.slab.pages.free": avg,
"plus.slab.pages.total": avg,
"plus.slab.pages.pct_used": avg,
"plus.instance.count": avg,
"container.cpu.cores": avg,
"container.cpu.period": avg,
"container.cpu.quota": avg,
"container.cpu.shares": avg,
"container.cpu.set.cores": avg,
"container.cpu.throttling.time": avg,
"container.cpu.throttling.throttled": avg,
"container.cpu.throttling.periods": avg,
"container.cpu.throttling.percent": avg,
"container.mem.oom": avg,
"container.mem.oom.kill": avg,
}

variableMetrics := map[*regexp.Regexp]MetricsHandler{
regexp.MustCompile(`slab.slots.*.fails`): sum,
regexp.MustCompile(`slab.slots.*.free`): avg,
regexp.MustCompile(`slab.slots.*.reqs`): sum,
regexp.MustCompile(`slab.slots.*.used`): avg,
}

calMap := GetCalculationMap()

for name, value := range internalMap {
if calculation, ok := calcFn[name]; ok {
aggegatedValue := calculation(value, count)
if valueType, ok := calMap[name]; ok {
var aggregatedValue float64
switch valueType {
case "sum":
aggregatedValue = sum(value, count)

case "avg":
aggregatedValue = avg(value, count)

case "boolean":
aggregatedValue = boolean(value, count)
}

// Only aggregate metrics when the aggregation method is defined
simpleMetrics = append(simpleMetrics, &proto.SimpleMetric{
Name: name,
Value: aggegatedValue,
Value: aggregatedValue,
})
} else {
for reg, calculation := range variableMetrics {
Expand Down
Loading