Skip to content

Commit

Permalink
Make metrics better follow guidelines (#787)
Browse files Browse the repository at this point in the history
* Improve stat linux metric names.

cpu is no longer used.

* node_cpu -> node_cpu_seconds_total for Linux

* Improve filesystem metric names with units

* Improve units and names of linux disk stats

Remove sector metrics, the bytes metrics cover those already.

* Infiniband counters should end in _total

* Improve timex metric names, convert to more normal units.

See
https://github.com/torvalds/linux/blob/3c073991eb417b6f785ddc6afbbdc369eb84aa6a/kernel/time/ntp.c#L909
for what stabil means, looks like a moving average of some form.

* Update test fixture

* For meminfo metrics that had "kB" units, add _bytes

* Interrupts counter should have _total
  • Loading branch information
brian-brazil authored and SuperQ committed Jan 17, 2018
1 parent b4d7ba1 commit a98067a
Show file tree
Hide file tree
Showing 12 changed files with 547 additions and 604 deletions.
2 changes: 1 addition & 1 deletion collector/cpu_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func init() {
func NewCPUCollector() (Collector, error) {
return &cpuCollector{
cpu: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", cpuCollectorSubsystem),
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "seconds_total"),
"Seconds the cpus spent in each mode.",
[]string{"cpu", "mode"}, nil,
),
Expand Down
93 changes: 38 additions & 55 deletions collector/diskstats_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,30 @@ import (
)

const (
diskSubsystem = "disk"
diskSectorSize uint64 = 512
diskSubsystem = "disk"
diskSectorSize = 512
)

var (
ignoredDevices = kingpin.Flag("collector.diskstats.ignored-devices", "Regexp of devices to ignore for diskstats.").Default("^(ram|loop|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d+$").String()
)

type typedFactorDesc struct {
desc *prometheus.Desc
valueType prometheus.ValueType
factor float64
}

func (d *typedFactorDesc) mustNewConstMetric(value float64, labels ...string) prometheus.Metric {
if d.factor != 0 {
value *= d.factor
}
return prometheus.MustNewConstMetric(d.desc, d.valueType, value, labels...)
}

type diskstatsCollector struct {
ignoredDevicesPattern *regexp.Regexp
descs []typedDesc
descs []typedFactorDesc
}

func init() {
Expand All @@ -54,70 +67,74 @@ func NewDiskstatsCollector() (Collector, error) {
return &diskstatsCollector{
ignoredDevicesPattern: regexp.MustCompile(*ignoredDevices),
// Docs from https://www.kernel.org/doc/Documentation/iostats.txt
descs: []typedDesc{
descs: []typedFactorDesc{
{
desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, diskSubsystem, "reads_completed"),
prometheus.BuildFQName(namespace, diskSubsystem, "reads_completed_total"),
"The total number of reads completed successfully.",
diskLabelNames,
nil,
), valueType: prometheus.CounterValue,
},
{
desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, diskSubsystem, "reads_merged"),
prometheus.BuildFQName(namespace, diskSubsystem, "reads_merged_total"),
"The total number of reads merged. See https://www.kernel.org/doc/Documentation/iostats.txt.",
diskLabelNames,
nil,
), valueType: prometheus.CounterValue,
},
{
desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, diskSubsystem, "sectors_read"),
"The total number of sectors read successfully.",
prometheus.BuildFQName(namespace, diskSubsystem, "read_bytes_total"),
"The total number of bytes read successfully.",
diskLabelNames,
nil,
), valueType: prometheus.CounterValue,
factor: diskSectorSize,
},
{
desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, diskSubsystem, "read_time_ms"),
prometheus.BuildFQName(namespace, diskSubsystem, "read_time_seconds_total"),
"The total number of milliseconds spent by all reads.",
diskLabelNames,
nil,
), valueType: prometheus.CounterValue,
factor: .001,
},
{
desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, diskSubsystem, "writes_completed"),
prometheus.BuildFQName(namespace, diskSubsystem, "writes_completed_total"),
"The total number of writes completed successfully.",
diskLabelNames,
nil,
), valueType: prometheus.CounterValue,
},
{
desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, diskSubsystem, "writes_merged"),
prometheus.BuildFQName(namespace, diskSubsystem, "writes_merged_total"),
"The number of writes merged. See https://www.kernel.org/doc/Documentation/iostats.txt.",
diskLabelNames,
nil,
), valueType: prometheus.CounterValue,
},
{
desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, diskSubsystem, "sectors_written"),
"The total number of sectors written successfully.",
prometheus.BuildFQName(namespace, diskSubsystem, "written_bytes_total"),
"The total number of bytes written successfully.",
diskLabelNames,
nil,
), valueType: prometheus.CounterValue,
factor: diskSectorSize,
},
{
desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, diskSubsystem, "write_time_ms"),
"This is the total number of milliseconds spent by all writes.",
prometheus.BuildFQName(namespace, diskSubsystem, "write_time_seconds_total"),
"This is the total number of seconds spent by all writes.",
diskLabelNames,
nil,
), valueType: prometheus.CounterValue,
factor: .001,
},
{
desc: prometheus.NewDesc(
Expand All @@ -129,35 +146,21 @@ func NewDiskstatsCollector() (Collector, error) {
},
{
desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, diskSubsystem, "io_time_ms"),
"Total Milliseconds spent doing I/Os.",
prometheus.BuildFQName(namespace, diskSubsystem, "io_time_seconds_total"),
"Total seconds spent doing I/Os.",
diskLabelNames,
nil,
), valueType: prometheus.CounterValue,
factor: .001,
},
{
desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, diskSubsystem, "io_time_weighted"),
"The weighted # of milliseconds spent doing I/Os. See https://www.kernel.org/doc/Documentation/iostats.txt.",
diskLabelNames,
nil,
), valueType: prometheus.CounterValue,
},
{
desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, diskSubsystem, "bytes_read"),
"The total number of bytes read successfully.",
diskLabelNames,
nil,
), valueType: prometheus.CounterValue,
},
{
desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, diskSubsystem, "bytes_written"),
"The total number of bytes written successfully.",
prometheus.BuildFQName(namespace, diskSubsystem, "io_time_weighted_seconds_total"),
"The weighted # of seconds spent doing I/Os. See https://www.kernel.org/doc/Documentation/iostats.txt.",
diskLabelNames,
nil,
), valueType: prometheus.CounterValue,
factor: .001,
},
},
}, nil
Expand Down Expand Up @@ -201,15 +204,6 @@ func getDiskStats() (map[string]map[int]string, error) {
return parseDiskStats(file)
}

func convertDiskSectorsToBytes(sectorCount string) (string, error) {
sectors, err := strconv.ParseUint(sectorCount, 10, 64)
if err != nil {
return "", err
}

return strconv.FormatUint(sectors*diskSectorSize, 10), nil
}

func parseDiskStats(r io.Reader) (map[string]map[int]string, error) {
var (
diskStats = map[string]map[int]string{}
Expand All @@ -226,17 +220,6 @@ func parseDiskStats(r io.Reader) (map[string]map[int]string, error) {
for i, v := range parts[3:] {
diskStats[dev][i] = v
}
bytesRead, err := convertDiskSectorsToBytes(diskStats[dev][2])
if err != nil {
return nil, fmt.Errorf("invalid value for sectors read in %s: %s", procFilePath("diskstats"), scanner.Text())
}
diskStats[dev][11] = bytesRead

bytesWritten, err := convertDiskSectorsToBytes(diskStats[dev][6])
if err != nil {
return nil, fmt.Errorf("invalid value for sectors written in %s: %s", procFilePath("diskstats"), scanner.Text())
}
diskStats[dev][12] = bytesWritten
}

return diskStats, scanner.Err()
Expand Down
8 changes: 0 additions & 8 deletions collector/diskstats_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,4 @@ func TestDiskStats(t *testing.T) {
if want, got := "68", diskStats["mmcblk0p2"][10]; want != got {
t.Errorf("want diskstats mmcblk0p2 %s, got %s", want, got)
}

if want, got := "513713216512", diskStats["sda"][11]; want != got {
t.Errorf("want diskstats sda read bytes %s, got %s", want, got)
}

if want, got := "258916880384", diskStats["sda"][12]; want != got {
t.Errorf("want diskstats sda write bytes %s, got %s", want, got)
}
}
6 changes: 3 additions & 3 deletions collector/filesystem_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,19 +72,19 @@ func NewFilesystemCollector() (Collector, error) {
filesystemsTypesPattern := regexp.MustCompile(*ignoredFSTypes)

sizeDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "size"),
prometheus.BuildFQName(namespace, subsystem, "size_bytes"),
"Filesystem size in bytes.",
filesystemLabelNames, nil,
)

freeDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "free"),
prometheus.BuildFQName(namespace, subsystem, "free_bytes"),
"Filesystem free space in bytes.",
filesystemLabelNames, nil,
)

availDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "avail"),
prometheus.BuildFQName(namespace, subsystem, "avail_bytes"),
"Filesystem space available to non-root users in bytes.",
filesystemLabelNames, nil,
)
Expand Down
Loading

0 comments on commit a98067a

Please sign in to comment.