Skip to content

Commit

Permalink
recon: fix replication age metric
Browse files Browse the repository at this point in the history
  • Loading branch information
talal committed Jan 29, 2020
1 parent abab40d commit 1531679
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 17 deletions.
6 changes: 3 additions & 3 deletions collector/fixtures/recon_failed_collect.prom
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
swift_cluster_accounts_quarantined{storage_ip="10.0.0.1"} 0
# HELP swift_cluster_accounts_replication_age Account replication age reported by the swift-recon tool.
# TYPE swift_cluster_accounts_replication_age gauge
swift_cluster_accounts_replication_age{storage_ip="10.0.0.1"} 1.579007237099724e+09
swift_cluster_accounts_replication_age{storage_ip="10.0.0.1"} -1.579007236099724e+09
# HELP swift_cluster_accounts_replication_duration Account replication duration reported by the swift-recon tool.
# TYPE swift_cluster_accounts_replication_duration gauge
swift_cluster_accounts_replication_duration{storage_ip="10.0.0.1"} 23.422847032546997
Expand All @@ -12,7 +12,7 @@ swift_cluster_accounts_replication_duration{storage_ip="10.0.0.1"} 23.4228470325
swift_cluster_containers_quarantined{storage_ip="10.0.0.1"} 0
# HELP swift_cluster_containers_replication_age Container replication age reported by the swift-recon tool.
# TYPE swift_cluster_containers_replication_age gauge
swift_cluster_containers_replication_age{storage_ip="10.0.0.1"} 1.579007236617117e+09
swift_cluster_containers_replication_age{storage_ip="10.0.0.1"} -1.579007235617117e+09
# HELP swift_cluster_containers_replication_duration Container replication duration reported by the swift-recon tool.
# TYPE swift_cluster_containers_replication_duration gauge
swift_cluster_containers_replication_duration{storage_ip="10.0.0.1"} 98.37576985359192
Expand All @@ -30,7 +30,7 @@ swift_cluster_drives_unmounted{storage_ip="10.0.0.1"} 0
swift_cluster_objects_quarantined{storage_ip="10.0.0.1"} 0
# HELP swift_cluster_objects_replication_age Object replication age reported by the swift-recon tool.
# TYPE swift_cluster_objects_replication_age gauge
swift_cluster_objects_replication_age{storage_ip="10.0.0.1"} 1.57900646181673e+09
swift_cluster_objects_replication_age{storage_ip="10.0.0.1"} -1.57900646081673e+09
# HELP swift_cluster_objects_replication_duration Object replication duration reported by the swift-recon tool.
# TYPE swift_cluster_objects_replication_duration gauge
swift_cluster_objects_replication_duration{storage_ip="10.0.0.1"} 5.449508202075958
Expand Down
12 changes: 6 additions & 6 deletions collector/fixtures/recon_successful_collect.prom
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ swift_cluster_accounts_quarantined{storage_ip="10.0.0.1"} 0
swift_cluster_accounts_quarantined{storage_ip="10.0.0.2"} 0
# HELP swift_cluster_accounts_replication_age Account replication age reported by the swift-recon tool.
# TYPE swift_cluster_accounts_replication_age gauge
swift_cluster_accounts_replication_age{storage_ip="10.0.0.1"} 1.577664676578959e+09
swift_cluster_accounts_replication_age{storage_ip="10.0.0.2"} 1.5776646689851e+09
swift_cluster_accounts_replication_age{storage_ip="10.0.0.1"} -1.577664675578959e+09
swift_cluster_accounts_replication_age{storage_ip="10.0.0.2"} -1.5776646679851e+09
# HELP swift_cluster_accounts_replication_duration Account replication duration reported by the swift-recon tool.
# TYPE swift_cluster_accounts_replication_duration gauge
swift_cluster_accounts_replication_duration{storage_ip="10.0.0.1"} 13.002140045166016
Expand All @@ -16,8 +16,8 @@ swift_cluster_containers_quarantined{storage_ip="10.0.0.1"} 0
swift_cluster_containers_quarantined{storage_ip="10.0.0.2"} 0
# HELP swift_cluster_containers_replication_age Container replication age reported by the swift-recon tool.
# TYPE swift_cluster_containers_replication_age gauge
swift_cluster_containers_replication_age{storage_ip="10.0.0.1"} 1.577664528691438e+09
swift_cluster_containers_replication_age{storage_ip="10.0.0.2"} 1.577664555743305e+09
swift_cluster_containers_replication_age{storage_ip="10.0.0.1"} -1.577664527691438e+09
swift_cluster_containers_replication_age{storage_ip="10.0.0.2"} -1.577664554743305e+09
# HELP swift_cluster_containers_replication_duration Container replication duration reported by the swift-recon tool.
# TYPE swift_cluster_containers_replication_duration gauge
swift_cluster_containers_replication_duration{storage_ip="10.0.0.1"} 83.79213690757751
Expand Down Expand Up @@ -56,8 +56,8 @@ swift_cluster_objects_quarantined{storage_ip="10.0.0.1"} 0
swift_cluster_objects_quarantined{storage_ip="10.0.0.2"} 0
# HELP swift_cluster_objects_replication_age Object replication age reported by the swift-recon tool.
# TYPE swift_cluster_objects_replication_age gauge
swift_cluster_objects_replication_age{storage_ip="10.0.0.1"} 1.577664310620143e+09
swift_cluster_objects_replication_age{storage_ip="10.0.0.2"} 1.577664316719913e+09
swift_cluster_objects_replication_age{storage_ip="10.0.0.1"} -1.577664309620143e+09
swift_cluster_objects_replication_age{storage_ip="10.0.0.2"} -1.577664315719913e+09
# HELP swift_cluster_objects_replication_duration Object replication duration reported by the swift-recon tool.
# TYPE swift_cluster_objects_replication_duration gauge
swift_cluster_objects_replication_duration{storage_ip="10.0.0.1"} 4.6007425824801125
Expand Down
21 changes: 16 additions & 5 deletions collector/recon.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,18 @@ import (

"github.com/prometheus/client_golang/prometheus"
"github.com/sapcc/go-bits/logg"
"github.com/sapcc/swift-health-exporter/test/cmd/utils"
)

// ReconCollector implements the prometheus.Collector interface.
type ReconCollector struct {
taskExitCode typedDesc
tasks []collectorTask
isTest bool
}

// NewReconCollector creates a new ReconCollector.
func NewReconCollector(pathToExecutable string) *ReconCollector {
func NewReconCollector(pathToExecutable string, isTest bool) *ReconCollector {
return &ReconCollector{
taskExitCode: typedDesc{
desc: prometheus.NewDesc("swift_recon_task_exit_code",
Expand All @@ -48,7 +50,7 @@ func NewReconCollector(pathToExecutable string) *ReconCollector {
newReconDriveAuditTask(pathToExecutable),
newReconMD5Task(pathToExecutable),
newReconQuarantinedTask(pathToExecutable),
newReconReplicationTask(pathToExecutable),
newReconReplicationTask(pathToExecutable, isTest),
newReconUnmountedTask(pathToExecutable),
newReconUpdaterSweepTask(pathToExecutable),
},
Expand Down Expand Up @@ -364,6 +366,7 @@ func (t *reconUpdaterSweepTask) collectMetrics(ch chan<- prometheus.Metric, exit
// reconReplicationTask implements the collector.collectorTask interface.
type reconReplicationTask struct {
pathToReconExecutable string
isTest bool

accountReplicationAge typedDesc
accountReplicationDuration typedDesc
Expand All @@ -373,9 +376,10 @@ type reconReplicationTask struct {
objectReplicationDuration typedDesc
}

func newReconReplicationTask(pathToReconExecutable string) collectorTask {
func newReconReplicationTask(pathToReconExecutable string, isTest bool) collectorTask {
return &reconReplicationTask{
pathToReconExecutable: pathToReconExecutable,
isTest: isTest,
accountReplicationAge: typedDesc{
desc: prometheus.NewDesc("swift_cluster_accounts_replication_age",
"Account replication age reported by the swift-recon tool.",
Expand Down Expand Up @@ -459,8 +463,15 @@ func (t *reconReplicationTask) collectMetrics(ch chan<- prometheus.Metric, exitC
continue // to next host
}

ch <- ageTypedDesc.mustNewConstMetric(data.ReplicationLast, hostname)
ch <- durTypedDesc.mustNewConstMetric(data.ReplicationTime, hostname)
if data.ReplicationLast > 0 {
now := float64(time.Now().Unix())
if t.isTest {
now = float64(utils.TimeNow().Second())
}
tDiff := now - data.ReplicationLast
ch <- ageTypedDesc.mustNewConstMetric(tDiff, hostname)
}
ch <- durTypedDesc.mustNewConstMetric(data.ReplicationTime, hostname) // good
}
} else {
exitCode = 1
Expand Down
4 changes: 2 additions & 2 deletions collector/recon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func TestReconCollector(t *testing.T) {
}

registry := prometheus.NewPedanticRegistry()
registry.MustRegister(NewReconCollector(pathToExecutable))
registry.MustRegister(NewReconCollector(pathToExecutable, true))
assert.HTTPRequest{
Method: "GET",
Path: "/metrics",
Expand All @@ -46,7 +46,7 @@ func TestReconCollectorWithErrors(t *testing.T) {
}

registry := prometheus.NewPedanticRegistry()
registry.MustRegister(NewReconCollector(pathToExecutable))
registry.MustRegister(NewReconCollector(pathToExecutable, true))
assert.HTTPRequest{
Method: "GET",
Path: "/metrics",
Expand Down
2 changes: 1 addition & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func main() {
swiftReconPath := getExecutablePath("SWIFT_RECON_PATH", "swift-recon")

prometheus.MustRegister(collector.NewDispersionCollector(swiftDispersionReportPath))
prometheus.MustRegister(collector.NewReconCollector(swiftReconPath))
prometheus.MustRegister(collector.NewReconCollector(swiftReconPath, false))

// this port has been allocated for Swift health exporter
// See: https://github.com/prometheus/prometheus/wiki/Default-port-allocations
Expand Down
10 changes: 10 additions & 0 deletions test/cmd/utils/time.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package utils

import "time"

var clockSeconds int64 = 1

//TimeNow replaces time.Now in unit tests.
func TimeNow() time.Time {
return time.Unix(clockSeconds, 0).UTC()
}

0 comments on commit 1531679

Please sign in to comment.