From 1f2cb248dd3aab350189d1a70b742831b0fe3c3f Mon Sep 17 00:00:00 2001 From: Nurlan Moldomurov Date: Fri, 13 Jun 2025 13:44:56 +0300 Subject: [PATCH 01/14] PMM-7 Add PostgreSQL 18 support with new metrics and collectors Introduce support for PostgreSQL 18, adding enhanced metrics for checkpointer, vacuum/analyze timings, and parallel worker activity. Implement new collectors for `pg_stat_io` and `pg_backend_memory_contexts` with version-specific queries, along with comprehensive tests. Update README and CI to reflect PostgreSQL 18 compatibility. --- CHANGELOG.md | 11 + README.md | 2 +- collector/pg_backend_memory_contexts.go | 216 +++++++++++++++++ collector/pg_backend_stats.go | 310 ++++++++++++++++++++++++ collector/pg_stat_bgwriter.go | 69 +++++- collector/pg_stat_database.go | 82 ++++++- collector/pg_stat_database_test.go | 8 +- collector/pg_stat_io.go | 256 +++++++++++++++++++ collector/pg_stat_io_test.go | 138 +++++++++++ collector/pg_stat_user_tables.go | 114 ++++++++- collector/pg_stat_user_tables_test.go | 4 +- queries-lr.yaml | 26 +- queries-mr.yaml | 28 +++ 13 files changed, 1241 insertions(+), 23 deletions(-) create mode 100644 collector/pg_backend_memory_contexts.go create mode 100644 collector/pg_backend_stats.go create mode 100644 collector/pg_stat_io.go create mode 100644 collector/pg_stat_io_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index dc8f898f6..e2e4aa846 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,14 @@ +## [Unreleased] + +* [ENHANCEMENT] Add PostgreSQL 18 support: + * Add parallel worker activity metrics (`pg_stat_database_parallel_workers_to_launch`, `pg_stat_database_parallel_workers_launched`) + * Add vacuum/analyze timing metrics (`pg_stat_user_tables_total_vacuum_time`, `pg_stat_user_tables_total_autovacuum_time`, `pg_stat_user_tables_total_analyze_time`, `pg_stat_user_tables_total_autoanalyze_time`) + * Add enhanced checkpointer metrics (`pg_stat_bgwriter_checkpoints_done_total`, `pg_stat_bgwriter_slru_written_total`) + * Add `pg_stat_io` collector with byte statistics and WAL I/O activity tracking + * Add `pg_backend_stats` collector for per-backend I/O and WAL statistics + * Add enhanced `pg_backend_memory_contexts` collector with type and path columns +* [ENHANCEMENT] Update CI tested PostgreSQL versions to include PostgreSQL 18 + ## 0.15.0 / 2023-10-27 * [ENHANCEMENT] Add 1kB and 2kB units #915 diff --git a/README.md b/README.md index 429058e6d..b22e5f798 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Prometheus exporter for PostgreSQL server metrics. -CI Tested PostgreSQL versions: `11`, `12`, `13`, `14`, `15`, `16` +CI Tested PostgreSQL versions: `11`, `12`, `13`, `14`, `15`, `16`, `18` ## Quick Start This package is available for Docker: diff --git a/collector/pg_backend_memory_contexts.go b/collector/pg_backend_memory_contexts.go new file mode 100644 index 000000000..2146d69ba --- /dev/null +++ b/collector/pg_backend_memory_contexts.go @@ -0,0 +1,216 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "database/sql" + + "github.com/blang/semver/v4" + "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" +) + +const backendMemoryContextsSubsystem = "backend_memory_contexts" + +func init() { + registerCollector(backendMemoryContextsSubsystem, defaultDisabled, NewPGBackendMemoryContextsCollector) +} + +type PGBackendMemoryContextsCollector struct { + log log.Logger +} + +func NewPGBackendMemoryContextsCollector(config collectorConfig) (Collector, error) { + return &PGBackendMemoryContextsCollector{log: config.logger}, nil +} + +var ( + backendMemoryContextsTotalBytes = prometheus.NewDesc( + prometheus.BuildFQName(namespace, backendMemoryContextsSubsystem, "total_bytes"), + "Total bytes allocated for memory context", + []string{"pid", "name", "ident", "parent", "level", "type", "path"}, + prometheus.Labels{}, + ) + backendMemoryContextsUsedBytes = prometheus.NewDesc( + prometheus.BuildFQName(namespace, backendMemoryContextsSubsystem, "used_bytes"), + "Used bytes in memory context", + []string{"pid", "name", "ident", "parent", "level", "type", "path"}, + prometheus.Labels{}, + ) + backendMemoryContextsFreeBytes = prometheus.NewDesc( + prometheus.BuildFQName(namespace, backendMemoryContextsSubsystem, "free_bytes"), + "Free bytes in memory context", + []string{"pid", "name", "ident", "parent", "level", "type", "path"}, + prometheus.Labels{}, + ) + backendMemoryContextsFreeChunks = prometheus.NewDesc( + prometheus.BuildFQName(namespace, backendMemoryContextsSubsystem, "free_chunks"), + "Number of free chunks in memory context", + []string{"pid", "name", "ident", "parent", "level", "type", "path"}, + prometheus.Labels{}, + ) + + // PostgreSQL 18+ query with type and path columns + backendMemoryContextsQuery18Plus = ` + SELECT + pid, + name, + COALESCE(ident, '') as ident, + COALESCE(parent, '') as parent, + level, + total_bytes, + total_nblocks, + free_bytes, + free_chunks, + used_bytes, + type, + path + FROM pg_backend_memory_contexts + ORDER BY pid, name + ` + + // Pre-PostgreSQL 18 query without type and path columns + backendMemoryContextsQueryPre18 = ` + SELECT + pid, + name, + COALESCE(ident, '') as ident, + COALESCE(parent, '') as parent, + level, + total_bytes, + total_nblocks, + free_bytes, + free_chunks, + used_bytes, + '' as type, + '' as path + FROM pg_backend_memory_contexts + ORDER BY pid, name + ` +) + +func (c *PGBackendMemoryContextsCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + // pg_backend_memory_contexts was introduced in PostgreSQL 14 + if instance.version.LT(semver.Version{Major: 14}) { + return nil + } + + db := instance.getDB() + + // Use version-specific query for PostgreSQL 18+ + query := backendMemoryContextsQueryPre18 + if instance.version.GTE(semver.Version{Major: 18}) { + query = backendMemoryContextsQuery18Plus + } + + rows, err := db.QueryContext(ctx, query) + if err != nil { + return err + } + defer rows.Close() + + for rows.Next() { + var pid, name, ident, parent, contextType, path sql.NullString + var level, totalNblocks, freeChunks sql.NullInt64 + var totalBytes, freeBytes, usedBytes sql.NullFloat64 + + err := rows.Scan( + &pid, + &name, + &ident, + &parent, + &level, + &totalBytes, + &totalNblocks, + &freeBytes, + &freeChunks, + &usedBytes, + &contextType, + &path, + ) + if err != nil { + return err + } + + pidLabel := "unknown" + if pid.Valid { + pidLabel = pid.String + } + nameLabel := "unknown" + if name.Valid { + nameLabel = name.String + } + identLabel := "" + if ident.Valid { + identLabel = ident.String + } + parentLabel := "" + if parent.Valid { + parentLabel = parent.String + } + levelLabel := "0" + if level.Valid { + levelLabel = string(rune(level.Int64 + '0')) + } + typeLabel := "" + if contextType.Valid { + typeLabel = contextType.String + } + pathLabel := "" + if path.Valid { + pathLabel = path.String + } + + labels := []string{pidLabel, nameLabel, identLabel, parentLabel, levelLabel, typeLabel, pathLabel} + + if totalBytes.Valid { + ch <- prometheus.MustNewConstMetric( + backendMemoryContextsTotalBytes, + prometheus.GaugeValue, + totalBytes.Float64, + labels..., + ) + } + + if usedBytes.Valid { + ch <- prometheus.MustNewConstMetric( + backendMemoryContextsUsedBytes, + prometheus.GaugeValue, + usedBytes.Float64, + labels..., + ) + } + + if freeBytes.Valid { + ch <- prometheus.MustNewConstMetric( + backendMemoryContextsFreeBytes, + prometheus.GaugeValue, + freeBytes.Float64, + labels..., + ) + } + + if freeChunks.Valid { + ch <- prometheus.MustNewConstMetric( + backendMemoryContextsFreeChunks, + prometheus.GaugeValue, + float64(freeChunks.Int64), + labels..., + ) + } + } + + return nil +} diff --git a/collector/pg_backend_stats.go b/collector/pg_backend_stats.go new file mode 100644 index 000000000..e28aa70ca --- /dev/null +++ b/collector/pg_backend_stats.go @@ -0,0 +1,310 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "database/sql" + + "github.com/blang/semver/v4" + "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" +) + +const backendStatsSubsystem = "backend_stats" + +func init() { + registerCollector(backendStatsSubsystem, defaultDisabled, NewPGBackendStatsCollector) +} + +type PGBackendStatsCollector struct { + log log.Logger +} + +func NewPGBackendStatsCollector(config collectorConfig) (Collector, error) { + return &PGBackendStatsCollector{log: config.logger}, nil +} + +var ( + // Backend I/O statistics metrics (PostgreSQL 18+) + backendIOReads = prometheus.NewDesc( + prometheus.BuildFQName(namespace, backendStatsSubsystem, "io_reads_total"), + "Number of reads by backend (PostgreSQL 18+)", + []string{"pid", "backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + backendIOWrites = prometheus.NewDesc( + prometheus.BuildFQName(namespace, backendStatsSubsystem, "io_writes_total"), + "Number of writes by backend (PostgreSQL 18+)", + []string{"pid", "backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + backendIOExtends = prometheus.NewDesc( + prometheus.BuildFQName(namespace, backendStatsSubsystem, "io_extends_total"), + "Number of extends by backend (PostgreSQL 18+)", + []string{"pid", "backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + backendIOReadBytes = prometheus.NewDesc( + prometheus.BuildFQName(namespace, backendStatsSubsystem, "io_read_bytes_total"), + "Number of bytes read by backend (PostgreSQL 18+)", + []string{"pid", "backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + backendIOWriteBytes = prometheus.NewDesc( + prometheus.BuildFQName(namespace, backendStatsSubsystem, "io_write_bytes_total"), + "Number of bytes written by backend (PostgreSQL 18+)", + []string{"pid", "backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + backendIOExtendBytes = prometheus.NewDesc( + prometheus.BuildFQName(namespace, backendStatsSubsystem, "io_extend_bytes_total"), + "Number of bytes extended by backend (PostgreSQL 18+)", + []string{"pid", "backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + + // Backend WAL statistics metrics (PostgreSQL 18+) + backendWALRecords = prometheus.NewDesc( + prometheus.BuildFQName(namespace, backendStatsSubsystem, "wal_records_total"), + "Number of WAL records generated by backend (PostgreSQL 18+)", + []string{"pid"}, + prometheus.Labels{}, + ) + backendWALBytes = prometheus.NewDesc( + prometheus.BuildFQName(namespace, backendStatsSubsystem, "wal_bytes_total"), + "Number of WAL bytes generated by backend (PostgreSQL 18+)", + []string{"pid"}, + prometheus.Labels{}, + ) + backendWALBuffersUsed = prometheus.NewDesc( + prometheus.BuildFQName(namespace, backendStatsSubsystem, "wal_buffers_used_total"), + "Number of WAL buffers used by backend (PostgreSQL 18+)", + []string{"pid"}, + prometheus.Labels{}, + ) + + // Backend I/O query for PostgreSQL 18+ + backendIOQuery = ` + SELECT + pid, + backend_type, + io_context, + io_object, + reads, + writes, + extends, + read_bytes, + write_bytes, + extend_bytes + FROM pg_stat_get_backend_io(NULL) + WHERE pid IS NOT NULL + ` + + // Backend WAL query for PostgreSQL 18+ + backendWALQuery = ` + SELECT + pid, + wal_records, + wal_bytes, + wal_buffers_used + FROM pg_stat_get_backend_wal(NULL) + WHERE pid IS NOT NULL + ` +) + +func (c *PGBackendStatsCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + // Backend statistics functions were introduced in PostgreSQL 18 + if instance.version.LT(semver.Version{Major: 18}) { + return nil + } + + db := instance.getDB() + + // Collect backend I/O statistics + if err := c.collectBackendIO(ctx, db, ch); err != nil { + return err + } + + // Collect backend WAL statistics + if err := c.collectBackendWAL(ctx, db, ch); err != nil { + return err + } + + return nil +} + +func (c *PGBackendStatsCollector) collectBackendIO(ctx context.Context, db *sql.DB, ch chan<- prometheus.Metric) error { + rows, err := db.QueryContext(ctx, backendIOQuery) + if err != nil { + return err + } + defer rows.Close() + + for rows.Next() { + var pid sql.NullString + var backendType, ioContext, ioObject sql.NullString + var reads, writes, extends, readBytes, writeBytes, extendBytes sql.NullFloat64 + + err := rows.Scan( + &pid, + &backendType, + &ioContext, + &ioObject, + &reads, + &writes, + &extends, + &readBytes, + &writeBytes, + &extendBytes, + ) + if err != nil { + return err + } + + pidLabel := "unknown" + if pid.Valid { + pidLabel = pid.String + } + backendTypeLabel := "unknown" + if backendType.Valid { + backendTypeLabel = backendType.String + } + ioContextLabel := "unknown" + if ioContext.Valid { + ioContextLabel = ioContext.String + } + ioObjectLabel := "unknown" + if ioObject.Valid { + ioObjectLabel = ioObject.String + } + + labels := []string{pidLabel, backendTypeLabel, ioContextLabel, ioObjectLabel} + + if reads.Valid { + ch <- prometheus.MustNewConstMetric( + backendIOReads, + prometheus.CounterValue, + reads.Float64, + labels..., + ) + } + + if writes.Valid { + ch <- prometheus.MustNewConstMetric( + backendIOWrites, + prometheus.CounterValue, + writes.Float64, + labels..., + ) + } + + if extends.Valid { + ch <- prometheus.MustNewConstMetric( + backendIOExtends, + prometheus.CounterValue, + extends.Float64, + labels..., + ) + } + + if readBytes.Valid { + ch <- prometheus.MustNewConstMetric( + backendIOReadBytes, + prometheus.CounterValue, + readBytes.Float64, + labels..., + ) + } + + if writeBytes.Valid { + ch <- prometheus.MustNewConstMetric( + backendIOWriteBytes, + prometheus.CounterValue, + writeBytes.Float64, + labels..., + ) + } + + if extendBytes.Valid { + ch <- prometheus.MustNewConstMetric( + backendIOExtendBytes, + prometheus.CounterValue, + extendBytes.Float64, + labels..., + ) + } + } + + return nil +} + +func (c *PGBackendStatsCollector) collectBackendWAL(ctx context.Context, db *sql.DB, ch chan<- prometheus.Metric) error { + rows, err := db.QueryContext(ctx, backendWALQuery) + if err != nil { + return err + } + defer rows.Close() + + for rows.Next() { + var pid sql.NullString + var walRecords, walBytes, walBuffersUsed sql.NullFloat64 + + err := rows.Scan( + &pid, + &walRecords, + &walBytes, + &walBuffersUsed, + ) + if err != nil { + return err + } + + pidLabel := "unknown" + if pid.Valid { + pidLabel = pid.String + } + + labels := []string{pidLabel} + + if walRecords.Valid { + ch <- prometheus.MustNewConstMetric( + backendWALRecords, + prometheus.CounterValue, + walRecords.Float64, + labels..., + ) + } + + if walBytes.Valid { + ch <- prometheus.MustNewConstMetric( + backendWALBytes, + prometheus.CounterValue, + walBytes.Float64, + labels..., + ) + } + + if walBuffersUsed.Valid { + ch <- prometheus.MustNewConstMetric( + backendWALBuffersUsed, + prometheus.CounterValue, + walBuffersUsed.Float64, + labels..., + ) + } + } + + return nil +} diff --git a/collector/pg_stat_bgwriter.go b/collector/pg_stat_bgwriter.go index 0b73b4f44..3606aa8c1 100644 --- a/collector/pg_stat_bgwriter.go +++ b/collector/pg_stat_bgwriter.go @@ -16,6 +16,7 @@ package collector import ( "context" "database/sql" + "github.com/blang/semver/v4" "github.com/prometheus/client_golang/prometheus" ) @@ -100,6 +101,18 @@ var ( []string{"collector", "server"}, prometheus.Labels{}, ) + statCheckpointerNumDoneDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, bgWriterSubsystem, "checkpoints_done_total"), + "Number of completed checkpoints (PostgreSQL 18+)", + []string{"collector", "server"}, + prometheus.Labels{}, + ) + statCheckpointerSlruWrittenDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, bgWriterSubsystem, "slru_written_total"), + "Number of SLRU buffers written during checkpoints (PostgreSQL 18+)", + []string{"collector", "server"}, + prometheus.Labels{}, + ) ) var statBGWriter = map[string]*prometheus.Desc{ "percona_checkpoints_timed": prometheus.NewDesc( @@ -191,7 +204,7 @@ const statBGWriterQueryPost17 = `SELECT ,stats_reset FROM pg_stat_bgwriter;` -const statCheckpointerQuery = `SELECT +const statCheckpointerQueryPre18 = `SELECT num_timed ,num_requested ,restartpoints_timed @@ -201,12 +214,28 @@ const statCheckpointerQuery = `SELECT ,sync_time ,buffers_written ,stats_reset + ,NULL::bigint as num_done + ,NULL::bigint as slru_written + FROM pg_stat_checkpointer;` + +const statCheckpointerQuery18Plus = `SELECT + num_timed + ,num_requested + ,restartpoints_timed + ,restartpoints_req + ,restartpoints_done + ,write_time + ,sync_time + ,buffers_written + ,stats_reset + ,num_done + ,slru_written FROM pg_stat_checkpointer;` func (p PGStatBGWriterCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { db := instance.getDB() - var cpt, cpr, bcp, bc, mwc, bb, bbf, ba sql.NullInt64 + var cpt, cpr, bcp, bc, mwc, bb, bbf, ba, numDone, slruWritten sql.NullInt64 var cpwt, cpst sql.NullFloat64 var sr sql.NullTime @@ -219,10 +248,15 @@ func (p PGStatBGWriterCollector) Update(ctx context.Context, instance *instance, } var rpt, rpr, rpd sql.NullInt64 var csr sql.NullTime - // these variables are not used, but I left them here for reference - row = db.QueryRowContext(ctx, - statCheckpointerQuery) - err = row.Scan(&cpt, &cpr, &rpt, &rpr, &rpd, &cpwt, &cpst, &bcp, &csr) + + // Use version-specific checkpointer query for PostgreSQL 18+ + checkpointerQuery := statCheckpointerQueryPre18 + if instance.version.GTE(semver.Version{Major: 18}) { + checkpointerQuery = statCheckpointerQuery18Plus + } + + row = db.QueryRowContext(ctx, checkpointerQuery) + err = row.Scan(&cpt, &cpr, &rpt, &rpr, &rpd, &cpwt, &cpst, &bcp, &csr, &numDone, &slruWritten) if err != nil { return err } @@ -357,6 +391,29 @@ func (p PGStatBGWriterCollector) Update(ctx context.Context, instance *instance, instance.name, ) + // PostgreSQL 18+ checkpointer metrics + if numDone.Valid { + numDoneMetric := float64(numDone.Int64) + ch <- prometheus.MustNewConstMetric( + statCheckpointerNumDoneDesc, + prometheus.CounterValue, + numDoneMetric, + "exporter", + instance.name, + ) + } + + if slruWritten.Valid { + slruWrittenMetric := float64(slruWritten.Int64) + ch <- prometheus.MustNewConstMetric( + statCheckpointerSlruWrittenDesc, + prometheus.CounterValue, + slruWrittenMetric, + "exporter", + instance.name, + ) + } + // TODO: analyze metrics below, why do we duplicate them? ch <- prometheus.MustNewConstMetric( diff --git a/collector/pg_stat_database.go b/collector/pg_stat_database.go index 328afee2c..f8a3aeef5 100644 --- a/collector/pg_stat_database.go +++ b/collector/pg_stat_database.go @@ -17,6 +17,7 @@ import ( "context" "database/sql" + "github.com/blang/semver/v4" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" @@ -206,8 +207,26 @@ var ( []string{"datid", "datname"}, prometheus.Labels{}, ) + statDatabaseParallelWorkersToLaunch = prometheus.NewDesc(prometheus.BuildFQName( + namespace, + statDatabaseSubsystem, + "parallel_workers_to_launch", + ), + "Number of parallel workers to launch (PostgreSQL 18+)", + []string{"datid", "datname"}, + prometheus.Labels{}, + ) + statDatabaseParallelWorkersLaunched = prometheus.NewDesc(prometheus.BuildFQName( + namespace, + statDatabaseSubsystem, + "parallel_workers_launched", + ), + "Number of parallel workers launched (PostgreSQL 18+)", + []string{"datid", "datname"}, + prometheus.Labels{}, + ) - statDatabaseQuery = ` + statDatabaseQueryPre18 = ` SELECT datid ,datname @@ -228,15 +247,48 @@ var ( ,blk_read_time ,blk_write_time ,stats_reset + ,NULL::bigint as parallel_workers_to_launch + ,NULL::bigint as parallel_workers_launched + FROM pg_stat_database; + ` + + statDatabaseQuery18Plus = ` + SELECT + datid + ,datname + ,numbackends + ,xact_commit + ,xact_rollback + ,blks_read + ,blks_hit + ,tup_returned + ,tup_fetched + ,tup_inserted + ,tup_updated + ,tup_deleted + ,conflicts + ,temp_files + ,temp_bytes + ,deadlocks + ,blk_read_time + ,blk_write_time + ,stats_reset + ,parallel_workers_to_launch + ,parallel_workers_launched FROM pg_stat_database; ` ) func (c *PGStatDatabaseCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { db := instance.getDB() - rows, err := db.QueryContext(ctx, - statDatabaseQuery, - ) + + // Use version-specific query for PostgreSQL 18+ + query := statDatabaseQueryPre18 + if instance.version.GTE(semver.Version{Major: 18}) { + query = statDatabaseQuery18Plus + } + + rows, err := db.QueryContext(ctx, query) if err != nil { return err } @@ -246,6 +298,7 @@ func (c *PGStatDatabaseCollector) Update(ctx context.Context, instance *instance var datid, datname sql.NullString var numBackends, xactCommit, xactRollback, blksRead, blksHit, tupReturned, tupFetched, tupInserted, tupUpdated, tupDeleted, conflicts, tempFiles, tempBytes, deadlocks, blkReadTime, blkWriteTime sql.NullFloat64 var statsReset sql.NullTime + var parallelWorkersToLaunch, parallelWorkersLaunched sql.NullFloat64 err := rows.Scan( &datid, @@ -267,6 +320,8 @@ func (c *PGStatDatabaseCollector) Update(ctx context.Context, instance *instance &blkReadTime, &blkWriteTime, &statsReset, + ¶llelWorkersToLaunch, + ¶llelWorkersLaunched, ) if err != nil { return err @@ -473,6 +528,25 @@ func (c *PGStatDatabaseCollector) Update(ctx context.Context, instance *instance statsResetMetric, labels..., ) + + // PostgreSQL 18+ parallel worker metrics + if parallelWorkersToLaunch.Valid { + ch <- prometheus.MustNewConstMetric( + statDatabaseParallelWorkersToLaunch, + prometheus.CounterValue, + parallelWorkersToLaunch.Float64, + labels..., + ) + } + + if parallelWorkersLaunched.Valid { + ch <- prometheus.MustNewConstMetric( + statDatabaseParallelWorkersLaunched, + prometheus.CounterValue, + parallelWorkersLaunched.Float64, + labels..., + ) + } } return nil } diff --git a/collector/pg_stat_database_test.go b/collector/pg_stat_database_test.go index fe1b17066..b9d4f1b83 100644 --- a/collector/pg_stat_database_test.go +++ b/collector/pg_stat_database_test.go @@ -82,7 +82,7 @@ func TestPGStatDatabaseCollector(t *testing.T) { 823, srT) - mock.ExpectQuery(sanitizeQuery(statDatabaseQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statDatabaseQueryPre18)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -203,7 +203,7 @@ func TestPGStatDatabaseCollectorNullValues(t *testing.T) { 16, 823, srT) - mock.ExpectQuery(sanitizeQuery(statDatabaseQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statDatabaseQueryPre18)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -345,7 +345,7 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { 17, 824, srT) - mock.ExpectQuery(sanitizeQuery(statDatabaseQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statDatabaseQueryPre18)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -460,7 +460,7 @@ func TestPGStatDatabaseCollectorTestNilStatReset(t *testing.T) { 823, nil) - mock.ExpectQuery(sanitizeQuery(statDatabaseQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statDatabaseQueryPre18)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { diff --git a/collector/pg_stat_io.go b/collector/pg_stat_io.go new file mode 100644 index 000000000..6b2f1dec5 --- /dev/null +++ b/collector/pg_stat_io.go @@ -0,0 +1,256 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "database/sql" + + "github.com/blang/semver/v4" + "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" +) + +const statIOSubsystem = "stat_io" + +func init() { + registerCollector(statIOSubsystem, defaultDisabled, NewPGStatIOCollector) +} + +type PGStatIOCollector struct { + log log.Logger +} + +func NewPGStatIOCollector(config collectorConfig) (Collector, error) { + return &PGStatIOCollector{log: config.logger}, nil +} + +var ( + statIOReads = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "reads_total"), + "Number of reads", + []string{"backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + statIOReadTime = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "read_time_seconds_total"), + "Time spent reading", + []string{"backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + statIOWrites = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "writes_total"), + "Number of writes", + []string{"backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + statIOWriteTime = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "write_time_seconds_total"), + "Time spent writing", + []string{"backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + statIOExtends = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "extends_total"), + "Number of extends", + []string{"backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + statIOReadBytes = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "read_bytes_total"), + "Number of bytes read (PostgreSQL 18+)", + []string{"backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + statIOWriteBytes = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "write_bytes_total"), + "Number of bytes written (PostgreSQL 18+)", + []string{"backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + statIOExtendBytes = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "extend_bytes_total"), + "Number of bytes extended (PostgreSQL 18+)", + []string{"backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + + // PostgreSQL 18+ query with byte statistics and WAL I/O + statIOQuery18Plus = ` + SELECT + backend_type, + io_context, + io_object, + reads, + read_time, + writes, + write_time, + extends, + read_bytes, + write_bytes, + extend_bytes + FROM pg_stat_io + ` + + // Pre-PostgreSQL 18 query without byte statistics + statIOQueryPre18 = ` + SELECT + backend_type, + io_context, + io_object, + reads, + read_time, + writes, + write_time, + extends, + NULL::bigint as read_bytes, + NULL::bigint as write_bytes, + NULL::bigint as extend_bytes + FROM pg_stat_io + ` +) + +func (c *PGStatIOCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + // pg_stat_io was introduced in PostgreSQL 16 + if instance.version.LT(semver.Version{Major: 16}) { + return nil + } + + db := instance.getDB() + + // Use version-specific query for PostgreSQL 18+ + query := statIOQueryPre18 + if instance.version.GTE(semver.Version{Major: 18}) { + query = statIOQuery18Plus + } + + rows, err := db.QueryContext(ctx, query) + if err != nil { + return err + } + defer rows.Close() + + for rows.Next() { + var backendType, ioContext, ioObject sql.NullString + var reads, writes, extends, readBytes, writeBytes, extendBytes sql.NullFloat64 + var readTime, writeTime sql.NullFloat64 + + err := rows.Scan( + &backendType, + &ioContext, + &ioObject, + &reads, + &readTime, + &writes, + &writeTime, + &extends, + &readBytes, + &writeBytes, + &extendBytes, + ) + if err != nil { + return err + } + + backendTypeLabel := "unknown" + if backendType.Valid { + backendTypeLabel = backendType.String + } + ioContextLabel := "unknown" + if ioContext.Valid { + ioContextLabel = ioContext.String + } + ioObjectLabel := "unknown" + if ioObject.Valid { + ioObjectLabel = ioObject.String + } + + labels := []string{backendTypeLabel, ioContextLabel, ioObjectLabel} + + if reads.Valid { + ch <- prometheus.MustNewConstMetric( + statIOReads, + prometheus.CounterValue, + reads.Float64, + labels..., + ) + } + + if readTime.Valid { + ch <- prometheus.MustNewConstMetric( + statIOReadTime, + prometheus.CounterValue, + readTime.Float64/1000.0, // Convert milliseconds to seconds + labels..., + ) + } + + if writes.Valid { + ch <- prometheus.MustNewConstMetric( + statIOWrites, + prometheus.CounterValue, + writes.Float64, + labels..., + ) + } + + if writeTime.Valid { + ch <- prometheus.MustNewConstMetric( + statIOWriteTime, + prometheus.CounterValue, + writeTime.Float64/1000.0, // Convert milliseconds to seconds + labels..., + ) + } + + if extends.Valid { + ch <- prometheus.MustNewConstMetric( + statIOExtends, + prometheus.CounterValue, + extends.Float64, + labels..., + ) + } + + // PostgreSQL 18+ byte statistics + if readBytes.Valid { + ch <- prometheus.MustNewConstMetric( + statIOReadBytes, + prometheus.CounterValue, + readBytes.Float64, + labels..., + ) + } + + if writeBytes.Valid { + ch <- prometheus.MustNewConstMetric( + statIOWriteBytes, + prometheus.CounterValue, + writeBytes.Float64, + labels..., + ) + } + + if extendBytes.Valid { + ch <- prometheus.MustNewConstMetric( + statIOExtendBytes, + prometheus.CounterValue, + extendBytes.Float64, + labels..., + ) + } + } + + return nil +} diff --git a/collector/pg_stat_io_test.go b/collector/pg_stat_io_test.go new file mode 100644 index 000000000..47233fc91 --- /dev/null +++ b/collector/pg_stat_io_test.go @@ -0,0 +1,138 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/blang/semver/v4" + "github.com/prometheus/client_golang/prometheus" +) + +func TestPGStatIOCollector(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub database connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("16.0.0")} + + columns := []string{"backend_type", "io_context", "io_object", "reads", "read_time", "writes", "write_time", "extends", "read_bytes", "write_bytes", "extend_bytes"} + rows := sqlmock.NewRows(columns). + AddRow("client backend", "normal", "relation", 100, 50.5, 75, 25.2, 10, nil, nil, nil) + mock.ExpectQuery("SELECT.*FROM pg_stat_io").WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatIOCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatIOCollector.Update: %s", err) + } + }() + + expected := 5 // reads, read_time, writes, write_time, extends (no byte metrics for v16) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("There were unfulfilled expectations: %s", err) + } + + metricCount := 0 + for m := range ch { + metricCount++ + _ = m + } + + if metricCount != expected { + t.Errorf("Expected %d metrics, got %d", expected, metricCount) + } +} + +func TestPGStatIOCollectorPostgreSQL18(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub database connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("18.0.0")} + + columns := []string{"backend_type", "io_context", "io_object", "reads", "read_time", "writes", "write_time", "extends", "read_bytes", "write_bytes", "extend_bytes"} + rows := sqlmock.NewRows(columns). + AddRow("client backend", "normal", "relation", 100, 50.5, 75, 25.2, 10, 1024, 2048, 512) + mock.ExpectQuery("SELECT.*FROM pg_stat_io").WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatIOCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatIOCollector.Update: %s", err) + } + }() + + expected := 8 // reads, read_time, writes, write_time, extends, read_bytes, write_bytes, extend_bytes + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("There were unfulfilled expectations: %s", err) + } + + metricCount := 0 + for m := range ch { + metricCount++ + _ = m + } + + if metricCount != expected { + t.Errorf("Expected %d metrics, got %d", expected, metricCount) + } +} + +func TestPGStatIOCollectorPrePostgreSQL16(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub database connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("15.0.0")} + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatIOCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatIOCollector.Update: %s", err) + } + }() + + // Should not make any queries for PostgreSQL < 16 + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("There were unfulfilled expectations: %s", err) + } + + metricCount := 0 + for m := range ch { + metricCount++ + _ = m + } + + if metricCount != 0 { + t.Errorf("Expected 0 metrics for PostgreSQL < 16, got %d", metricCount) + } +} diff --git a/collector/pg_stat_user_tables.go b/collector/pg_stat_user_tables.go index af3822ca8..9e75ec9b9 100644 --- a/collector/pg_stat_user_tables.go +++ b/collector/pg_stat_user_tables.go @@ -17,6 +17,7 @@ import ( "context" "database/sql" + "github.com/blang/semver/v4" "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" ) @@ -156,8 +157,63 @@ var ( []string{"datname", "schemaname", "relname"}, prometheus.Labels{}, ) + statUserTablesTotalVacuumTime = prometheus.NewDesc( + prometheus.BuildFQName(namespace, userTableSubsystem, "total_vacuum_time"), + "Time spent vacuuming this table, in milliseconds (PostgreSQL 18+)", + []string{"datname", "schemaname", "relname"}, + prometheus.Labels{}, + ) + statUserTablesTotalAutovacuumTime = prometheus.NewDesc( + prometheus.BuildFQName(namespace, userTableSubsystem, "total_autovacuum_time"), + "Time spent auto-vacuuming this table, in milliseconds (PostgreSQL 18+)", + []string{"datname", "schemaname", "relname"}, + prometheus.Labels{}, + ) + statUserTablesTotalAnalyzeTime = prometheus.NewDesc( + prometheus.BuildFQName(namespace, userTableSubsystem, "total_analyze_time"), + "Time spent analyzing this table, in milliseconds (PostgreSQL 18+)", + []string{"datname", "schemaname", "relname"}, + prometheus.Labels{}, + ) + statUserTablesTotalAutoanalyzeTime = prometheus.NewDesc( + prometheus.BuildFQName(namespace, userTableSubsystem, "total_autoanalyze_time"), + "Time spent auto-analyzing this table, in milliseconds (PostgreSQL 18+)", + []string{"datname", "schemaname", "relname"}, + prometheus.Labels{}, + ) + + statUserTablesQueryPre18 = `SELECT + current_database() datname, + schemaname, + relname, + seq_scan, + seq_tup_read, + idx_scan, + idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + n_mod_since_analyze, + COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, + COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, + COALESCE(last_analyze, '1970-01-01Z') as last_analyze, + COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + pg_total_relation_size(relid) as total_size, + NULL::double precision as total_vacuum_time, + NULL::double precision as total_autovacuum_time, + NULL::double precision as total_analyze_time, + NULL::double precision as total_autoanalyze_time + FROM + pg_stat_user_tables` - statUserTablesQuery = `SELECT + statUserTablesQuery18Plus = `SELECT current_database() datname, schemaname, relname, @@ -180,15 +236,25 @@ var ( autovacuum_count, analyze_count, autoanalyze_count, - pg_total_relation_size(relid) as total_size + pg_total_relation_size(relid) as total_size, + total_vacuum_time, + total_autovacuum_time, + total_analyze_time, + total_autoanalyze_time FROM pg_stat_user_tables` ) func (c *PGStatUserTablesCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { db := instance.getDB() - rows, err := db.QueryContext(ctx, - statUserTablesQuery) + + // Use version-specific query for PostgreSQL 18+ + query := statUserTablesQueryPre18 + if instance.version.GTE(semver.Version{Major: 18}) { + query = statUserTablesQuery18Plus + } + + rows, err := db.QueryContext(ctx, query) if err != nil { return err @@ -200,8 +266,9 @@ func (c *PGStatUserTablesCollector) Update(ctx context.Context, instance *instan var seqScan, seqTupRead, idxScan, idxTupFetch, nTupIns, nTupUpd, nTupDel, nTupHotUpd, nLiveTup, nDeadTup, nModSinceAnalyze, vacuumCount, autovacuumCount, analyzeCount, autoanalyzeCount, totalSize sql.NullInt64 var lastVacuum, lastAutovacuum, lastAnalyze, lastAutoanalyze sql.NullTime + var totalVacuumTime, totalAutovacuumTime, totalAnalyzeTime, totalAutoanalyzeTime sql.NullFloat64 - if err := rows.Scan(&datname, &schemaname, &relname, &seqScan, &seqTupRead, &idxScan, &idxTupFetch, &nTupIns, &nTupUpd, &nTupDel, &nTupHotUpd, &nLiveTup, &nDeadTup, &nModSinceAnalyze, &lastVacuum, &lastAutovacuum, &lastAnalyze, &lastAutoanalyze, &vacuumCount, &autovacuumCount, &analyzeCount, &autoanalyzeCount, &totalSize); err != nil { + if err := rows.Scan(&datname, &schemaname, &relname, &seqScan, &seqTupRead, &idxScan, &idxTupFetch, &nTupIns, &nTupUpd, &nTupDel, &nTupHotUpd, &nLiveTup, &nDeadTup, &nModSinceAnalyze, &lastVacuum, &lastAutovacuum, &lastAnalyze, &lastAutoanalyze, &vacuumCount, &autovacuumCount, &analyzeCount, &autoanalyzeCount, &totalSize, &totalVacuumTime, &totalAutovacuumTime, &totalAnalyzeTime, &totalAutoanalyzeTime); err != nil { return err } @@ -437,6 +504,43 @@ func (c *PGStatUserTablesCollector) Update(ctx context.Context, instance *instan totalSizeMetric, datnameLabel, schemanameLabel, relnameLabel, ) + + // PostgreSQL 18+ vacuum/analyze timing metrics + if totalVacuumTime.Valid { + ch <- prometheus.MustNewConstMetric( + statUserTablesTotalVacuumTime, + prometheus.CounterValue, + totalVacuumTime.Float64, + datnameLabel, schemanameLabel, relnameLabel, + ) + } + + if totalAutovacuumTime.Valid { + ch <- prometheus.MustNewConstMetric( + statUserTablesTotalAutovacuumTime, + prometheus.CounterValue, + totalAutovacuumTime.Float64, + datnameLabel, schemanameLabel, relnameLabel, + ) + } + + if totalAnalyzeTime.Valid { + ch <- prometheus.MustNewConstMetric( + statUserTablesTotalAnalyzeTime, + prometheus.CounterValue, + totalAnalyzeTime.Float64, + datnameLabel, schemanameLabel, relnameLabel, + ) + } + + if totalAutoanalyzeTime.Valid { + ch <- prometheus.MustNewConstMetric( + statUserTablesTotalAutoanalyzeTime, + prometheus.CounterValue, + totalAutoanalyzeTime.Float64, + datnameLabel, schemanameLabel, relnameLabel, + ) + } } if err := rows.Err(); err != nil { diff --git a/collector/pg_stat_user_tables_test.go b/collector/pg_stat_user_tables_test.go index 5e82335c3..e9a6b9f1e 100644 --- a/collector/pg_stat_user_tables_test.go +++ b/collector/pg_stat_user_tables_test.go @@ -97,7 +97,7 @@ func TestPGStatUserTablesCollector(t *testing.T) { 13, 14, 15) - mock.ExpectQuery(sanitizeQuery(statUserTablesQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statUserTablesQueryPre18)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { defer close(ch) @@ -198,7 +198,7 @@ func TestPGStatUserTablesCollectorNullValues(t *testing.T) { nil, nil, nil) - mock.ExpectQuery(sanitizeQuery(statUserTablesQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statUserTablesQueryPre18)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { defer close(ch) diff --git a/queries-lr.yaml b/queries-lr.yaml index 64ffdf824..5b2268a04 100644 --- a/queries-lr.yaml +++ b/queries-lr.yaml @@ -22,7 +22,19 @@ pg_stat_user_tables: vacuum_count, autovacuum_count, analyze_count, - autoanalyze_count + autoanalyze_count, + CASE WHEN current_setting('server_version_num')::int >= 180000 + THEN COALESCE(total_vacuum_time, 0) + ELSE 0 END as total_vacuum_time, + CASE WHEN current_setting('server_version_num')::int >= 180000 + THEN COALESCE(total_autovacuum_time, 0) + ELSE 0 END as total_autovacuum_time, + CASE WHEN current_setting('server_version_num')::int >= 180000 + THEN COALESCE(total_analyze_time, 0) + ELSE 0 END as total_analyze_time, + CASE WHEN current_setting('server_version_num')::int >= 180000 + THEN COALESCE(total_autoanalyze_time, 0) + ELSE 0 END as total_autoanalyze_time FROM pg_stat_user_tables metrics: @@ -92,6 +104,18 @@ pg_stat_user_tables: - autoanalyze_count: usage: "COUNTER" description: "Number of times this table has been analyzed by the autovacuum daemon" + - total_vacuum_time: + usage: "COUNTER" + description: "Time spent vacuuming this table, in milliseconds (PostgreSQL 18+)" + - total_autovacuum_time: + usage: "COUNTER" + description: "Time spent auto-vacuuming this table, in milliseconds (PostgreSQL 18+)" + - total_analyze_time: + usage: "COUNTER" + description: "Time spent analyzing this table, in milliseconds (PostgreSQL 18+)" + - total_autoanalyze_time: + usage: "COUNTER" + description: "Time spent auto-analyzing this table, in milliseconds (PostgreSQL 18+)" pg_statio_user_tables: query: "SELECT current_database() datname, schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables" diff --git a/queries-mr.yaml b/queries-mr.yaml index 700e74b65..d950d1554 100644 --- a/queries-mr.yaml +++ b/queries-mr.yaml @@ -26,6 +26,34 @@ pg_database: - size_bytes: usage: "GAUGE" description: "Disk space used by the database" + +# PostgreSQL 18+ enhanced database statistics +pg_stat_database_18: + query: | + SELECT + datname, + CASE WHEN current_setting('server_version_num')::int >= 180000 + THEN COALESCE(parallel_workers_to_launch, 0) + ELSE 0 END as parallel_workers_to_launch, + CASE WHEN current_setting('server_version_num')::int >= 180000 + THEN COALESCE(parallel_workers_launched, 0) + ELSE 0 END as parallel_workers_launched + FROM + pg_stat_database + WHERE + datname IS NOT NULL + master: true + cache_seconds: 30 + metrics: + - datname: + usage: "LABEL" + description: "Name of the database" + - parallel_workers_to_launch: + usage: "COUNTER" + description: "Number of parallel workers to launch (PostgreSQL 18+)" + - parallel_workers_launched: + usage: "COUNTER" + description: "Number of parallel workers launched (PostgreSQL 18+)" #### #pg_stat_statements: # query: "SELECT t2.rolname, t3.datname, queryid, calls, total_time / 1000 as total_time_seconds, min_time / 1000 as min_time_seconds, max_time / 1000 as max_time_seconds, mean_time / 1000 as mean_time_seconds, stddev_time / 1000 as stddev_time_seconds, rows, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, temp_blks_written, blk_read_time / 1000 as blk_read_time_seconds, blk_write_time / 1000 as blk_write_time_seconds FROM pg_stat_statements t1 JOIN pg_roles t2 ON (t1.userid=t2.oid) JOIN pg_database t3 ON (t1.dbid=t3.oid) WHERE t2.rolname != 'rdsadmin'" From 65e13bbf98bbd23f110c736d0bc79406125f7bcb Mon Sep 17 00:00:00 2001 From: Nurlan Moldomurov Date: Fri, 13 Jun 2025 14:42:26 +0300 Subject: [PATCH 02/14] PMM-7 Update collector tests and queries for PostgreSQL 18 compatibility Refactored IO and statistics queries to accommodate PostgreSQL 18 features, including byte-level statistics and WAL I/O. Updated tests to validate new columns and ensure query expectation checks are correctly enforced. Improved test coverage across different PostgreSQL versions. --- collector/pg_stat_database_test.go | 40 +++++++++++++++++++++++---- collector/pg_stat_io.go | 8 +++--- collector/pg_stat_io_test.go | 18 ++++++------ collector/pg_stat_user_tables_test.go | 28 ++++++++++++++++--- 4 files changed, 72 insertions(+), 22 deletions(-) diff --git a/collector/pg_stat_database_test.go b/collector/pg_stat_database_test.go index b9d4f1b83..90d442a5a 100644 --- a/collector/pg_stat_database_test.go +++ b/collector/pg_stat_database_test.go @@ -53,6 +53,8 @@ func TestPGStatDatabaseCollector(t *testing.T) { "blk_read_time", "blk_write_time", "stats_reset", + "parallel_workers_to_launch", + "parallel_workers_launched", } srT, err := time.Parse("2006-01-02 15:04:05.00000-07", "2023-05-25 17:10:42.81132-07") @@ -80,7 +82,10 @@ func TestPGStatDatabaseCollector(t *testing.T) { 925, 16, 823, - srT) + srT, + nil, + nil, + ) mock.ExpectQuery(sanitizeQuery(statDatabaseQueryPre18)).WillReturnRows(rows) @@ -160,6 +165,8 @@ func TestPGStatDatabaseCollectorNullValues(t *testing.T) { "blk_read_time", "blk_write_time", "stats_reset", + "parallel_workers_to_launch", + "parallel_workers_launched", } rows := sqlmock.NewRows(columns). @@ -182,7 +189,10 @@ func TestPGStatDatabaseCollectorNullValues(t *testing.T) { 925, 16, 823, - srT). + srT, + nil, + nil, + ). AddRow( "pid", "postgres", @@ -202,7 +212,10 @@ func TestPGStatDatabaseCollectorNullValues(t *testing.T) { 925, 16, 823, - srT) + srT, + nil, + nil, + ) mock.ExpectQuery(sanitizeQuery(statDatabaseQueryPre18)).WillReturnRows(rows) ch := make(chan prometheus.Metric) @@ -276,6 +289,8 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { "blk_read_time", "blk_write_time", "stats_reset", + "parallel_workers_to_launch", + "parallel_workers_launched", } srT, err := time.Parse("2006-01-02 15:04:05.00000-07", "2023-05-25 17:10:42.81132-07") @@ -303,7 +318,10 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { 925, 16, 823, - srT). + srT, + nil, + nil, + ). AddRow( nil, nil, @@ -324,6 +342,8 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { nil, nil, nil, + nil, + nil, ). AddRow( "pid", @@ -344,7 +364,10 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { 926, 17, 824, - srT) + srT, + nil, + nil, + ) mock.ExpectQuery(sanitizeQuery(statDatabaseQueryPre18)).WillReturnRows(rows) ch := make(chan prometheus.Metric) @@ -436,6 +459,8 @@ func TestPGStatDatabaseCollectorTestNilStatReset(t *testing.T) { "blk_read_time", "blk_write_time", "stats_reset", + "parallel_workers_to_launch", + "parallel_workers_launched", } rows := sqlmock.NewRows(columns). @@ -458,7 +483,10 @@ func TestPGStatDatabaseCollectorTestNilStatReset(t *testing.T) { 925, 16, 823, - nil) + nil, + nil, + nil, + ) mock.ExpectQuery(sanitizeQuery(statDatabaseQueryPre18)).WillReturnRows(rows) diff --git a/collector/pg_stat_io.go b/collector/pg_stat_io.go index 6b2f1dec5..70fe09bae 100644 --- a/collector/pg_stat_io.go +++ b/collector/pg_stat_io.go @@ -87,7 +87,7 @@ var ( ) // PostgreSQL 18+ query with byte statistics and WAL I/O - statIOQuery18Plus = ` + StatIOQuery18Plus = ` SELECT backend_type, io_context, @@ -104,7 +104,7 @@ var ( ` // Pre-PostgreSQL 18 query without byte statistics - statIOQueryPre18 = ` + StatIOQueryPre18 = ` SELECT backend_type, io_context, @@ -130,9 +130,9 @@ func (c *PGStatIOCollector) Update(ctx context.Context, instance *instance, ch c db := instance.getDB() // Use version-specific query for PostgreSQL 18+ - query := statIOQueryPre18 + query := StatIOQueryPre18 if instance.version.GTE(semver.Version{Major: 18}) { - query = statIOQuery18Plus + query = StatIOQuery18Plus } rows, err := db.QueryContext(ctx, query) diff --git a/collector/pg_stat_io_test.go b/collector/pg_stat_io_test.go index 47233fc91..d478ab8cb 100644 --- a/collector/pg_stat_io_test.go +++ b/collector/pg_stat_io_test.go @@ -34,7 +34,7 @@ func TestPGStatIOCollector(t *testing.T) { columns := []string{"backend_type", "io_context", "io_object", "reads", "read_time", "writes", "write_time", "extends", "read_bytes", "write_bytes", "extend_bytes"} rows := sqlmock.NewRows(columns). AddRow("client backend", "normal", "relation", 100, 50.5, 75, 25.2, 10, nil, nil, nil) - mock.ExpectQuery("SELECT.*FROM pg_stat_io").WillReturnRows(rows) + mock.ExpectQuery("SELECT.*backend_type.*FROM pg_stat_io").WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -47,9 +47,6 @@ func TestPGStatIOCollector(t *testing.T) { }() expected := 5 // reads, read_time, writes, write_time, extends (no byte metrics for v16) - if err := mock.ExpectationsWereMet(); err != nil { - t.Errorf("There were unfulfilled expectations: %s", err) - } metricCount := 0 for m := range ch { @@ -60,6 +57,10 @@ func TestPGStatIOCollector(t *testing.T) { if metricCount != expected { t.Errorf("Expected %d metrics, got %d", expected, metricCount) } + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("There were unfulfilled expectations: %s", err) + } } func TestPGStatIOCollectorPostgreSQL18(t *testing.T) { @@ -74,7 +75,7 @@ func TestPGStatIOCollectorPostgreSQL18(t *testing.T) { columns := []string{"backend_type", "io_context", "io_object", "reads", "read_time", "writes", "write_time", "extends", "read_bytes", "write_bytes", "extend_bytes"} rows := sqlmock.NewRows(columns). AddRow("client backend", "normal", "relation", 100, 50.5, 75, 25.2, 10, 1024, 2048, 512) - mock.ExpectQuery("SELECT.*FROM pg_stat_io").WillReturnRows(rows) + mock.ExpectQuery("SELECT.*backend_type.*FROM pg_stat_io").WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -87,9 +88,6 @@ func TestPGStatIOCollectorPostgreSQL18(t *testing.T) { }() expected := 8 // reads, read_time, writes, write_time, extends, read_bytes, write_bytes, extend_bytes - if err := mock.ExpectationsWereMet(); err != nil { - t.Errorf("There were unfulfilled expectations: %s", err) - } metricCount := 0 for m := range ch { @@ -100,6 +98,10 @@ func TestPGStatIOCollectorPostgreSQL18(t *testing.T) { if metricCount != expected { t.Errorf("Expected %d metrics, got %d", expected, metricCount) } + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("There were unfulfilled expectations: %s", err) + } } func TestPGStatIOCollectorPrePostgreSQL16(t *testing.T) { diff --git a/collector/pg_stat_user_tables_test.go b/collector/pg_stat_user_tables_test.go index e9a6b9f1e..338b24cc3 100644 --- a/collector/pg_stat_user_tables_test.go +++ b/collector/pg_stat_user_tables_test.go @@ -72,7 +72,12 @@ func TestPGStatUserTablesCollector(t *testing.T) { "autovacuum_count", "analyze_count", "autoanalyze_count", - "total_size"} + "total_size", + "total_vacuum_time", + "total_autovacuum_time", + "total_analyze_time", + "total_autoanalyze_time", + } rows := sqlmock.NewRows(columns). AddRow("postgres", "public", @@ -96,7 +101,12 @@ func TestPGStatUserTablesCollector(t *testing.T) { 12, 13, 14, - 15) + 15, + nil, + nil, + nil, + nil, + ) mock.ExpectQuery(sanitizeQuery(statUserTablesQueryPre18)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -173,7 +183,12 @@ func TestPGStatUserTablesCollectorNullValues(t *testing.T) { "autovacuum_count", "analyze_count", "autoanalyze_count", - "total_size"} + "total_size", + "total_vacuum_time", + "total_autovacuum_time", + "total_analyze_time", + "total_autoanalyze_time", + } rows := sqlmock.NewRows(columns). AddRow("postgres", nil, @@ -197,7 +212,12 @@ func TestPGStatUserTablesCollectorNullValues(t *testing.T) { nil, nil, nil, - nil) + nil, + nil, + nil, + nil, + nil, + ) mock.ExpectQuery(sanitizeQuery(statUserTablesQueryPre18)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { From 9298502d3f9d44b6bff6251b349f44ec8db4cd21 Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Mon, 29 Sep 2025 17:56:35 +0200 Subject: [PATCH 03/14] Update CI matrix --- .github/workflows/go.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 17e4494f3..969abeda2 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -14,11 +14,12 @@ jobs: strategy: matrix: postgresql-image: - - postgres:10 - - postgres:11 - - postgres:12 - postgres:13 - postgres:14 + - postgres:15 + - postgres:16 + - postgres:17 + - postgres:18 runs-on: ubuntu-latest steps: - name: Checkout code From 94027f30cd51bc1abe51c4e59667b38b57898a71 Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Thu, 2 Oct 2025 18:15:11 +0200 Subject: [PATCH 04/14] Refactor pg_stat_database collector --- collector/pg_stat_database.go | 28 +++++++++++------ collector/pg_stat_database_test.go | 49 +++++++++++++++++++----------- 2 files changed, 50 insertions(+), 27 deletions(-) diff --git a/collector/pg_stat_database.go b/collector/pg_stat_database.go index f8a3aeef5..1afc93ed4 100644 --- a/collector/pg_stat_database.go +++ b/collector/pg_stat_database.go @@ -226,7 +226,7 @@ var ( prometheus.Labels{}, ) - statDatabaseQueryPre18 = ` + statDatabaseQueryPrePG18 = ` SELECT datid ,datname @@ -252,7 +252,7 @@ var ( FROM pg_stat_database; ` - statDatabaseQuery18Plus = ` + statDatabaseQueryPG18 = ` SELECT datid ,datname @@ -282,10 +282,11 @@ var ( func (c *PGStatDatabaseCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { db := instance.getDB() + after18 := instance.version.GTE(semver.Version{Major: 18}) // Use version-specific query for PostgreSQL 18+ - query := statDatabaseQueryPre18 - if instance.version.GTE(semver.Version{Major: 18}) { - query = statDatabaseQuery18Plus + query := statDatabaseQueryPrePG18 + if after18 { + query = statDatabaseQueryPG18 } rows, err := db.QueryContext(ctx, query) @@ -408,6 +409,18 @@ func (c *PGStatDatabaseCollector) Update(ctx context.Context, instance *instance statsResetMetric = float64(statsReset.Time.Unix()) } + if after18 { + if !parallelWorkersToLaunch.Valid && after18 { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no parallel_workers_to_launch") + continue + } + + if !parallelWorkersLaunched.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no parallel_workers_launched") + continue + } + } + labels := []string{datid.String, datname.String} ch <- prometheus.MustNewConstMetric( @@ -529,17 +542,14 @@ func (c *PGStatDatabaseCollector) Update(ctx context.Context, instance *instance labels..., ) - // PostgreSQL 18+ parallel worker metrics - if parallelWorkersToLaunch.Valid { + if after18 { ch <- prometheus.MustNewConstMetric( statDatabaseParallelWorkersToLaunch, prometheus.CounterValue, parallelWorkersToLaunch.Float64, labels..., ) - } - if parallelWorkersLaunched.Valid { ch <- prometheus.MustNewConstMetric( statDatabaseParallelWorkersLaunched, prometheus.CounterValue, diff --git a/collector/pg_stat_database_test.go b/collector/pg_stat_database_test.go index 90d442a5a..cb82814c2 100644 --- a/collector/pg_stat_database_test.go +++ b/collector/pg_stat_database_test.go @@ -18,12 +18,15 @@ import ( "time" "github.com/DATA-DOG/go-sqlmock" + "github.com/blang/semver/v4" "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" "github.com/smartystreets/goconvey/convey" ) +var pg18 = semver.MustParse("18.0.0") + func TestPGStatDatabaseCollector(t *testing.T) { db, mock, err := sqlmock.New() if err != nil { @@ -31,7 +34,7 @@ func TestPGStatDatabaseCollector(t *testing.T) { } defer db.Close() - inst := &instance{db: db} + inst := &instance{db: db, version: pg18} columns := []string{ "datid", @@ -83,11 +86,11 @@ func TestPGStatDatabaseCollector(t *testing.T) { 16, 823, srT, - nil, - nil, + 3, + 2, ) - mock.ExpectQuery(sanitizeQuery(statDatabaseQueryPre18)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statDatabaseQueryPG18)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -119,6 +122,8 @@ func TestPGStatDatabaseCollector(t *testing.T) { {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 16}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 823}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 1685059842}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 3}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 2}, } convey.Convey("Metrics comparison", t, func() { @@ -143,7 +148,7 @@ func TestPGStatDatabaseCollectorNullValues(t *testing.T) { if err != nil { t.Fatalf("Error parsing time: %s", err) } - inst := &instance{db: db} + inst := &instance{db: db, version: pg18} columns := []string{ "datid", @@ -213,10 +218,10 @@ func TestPGStatDatabaseCollectorNullValues(t *testing.T) { 16, 823, srT, - nil, - nil, + 3, + 2, ) - mock.ExpectQuery(sanitizeQuery(statDatabaseQueryPre18)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statDatabaseQueryPG18)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -248,6 +253,8 @@ func TestPGStatDatabaseCollectorNullValues(t *testing.T) { {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 16}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 823}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 1685059842}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 3}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 2}, } convey.Convey("Metrics comparison", t, func() { @@ -267,7 +274,7 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { } defer db.Close() - inst := &instance{db: db} + inst := &instance{db: db, version: pg18} columns := []string{ "datid", @@ -319,8 +326,8 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { 16, 823, srT, - nil, - nil, + 5, + 4, ). AddRow( nil, @@ -365,10 +372,10 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { 17, 824, srT, - nil, - nil, + 3, + 2, ) - mock.ExpectQuery(sanitizeQuery(statDatabaseQueryPre18)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statDatabaseQueryPG18)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -400,6 +407,8 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 16}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 823}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 1685059842}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 4}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_GAUGE, value: 355}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 4946}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 289097745}, @@ -417,6 +426,8 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 17}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 824}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 1685059842}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 3}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 2}, } convey.Convey("Metrics comparison", t, func() { @@ -437,7 +448,7 @@ func TestPGStatDatabaseCollectorTestNilStatReset(t *testing.T) { } defer db.Close() - inst := &instance{db: db} + inst := &instance{db: db, version: pg18} columns := []string{ "datid", @@ -484,11 +495,11 @@ func TestPGStatDatabaseCollectorTestNilStatReset(t *testing.T) { 16, 823, nil, - nil, - nil, + 3, + 2, ) - mock.ExpectQuery(sanitizeQuery(statDatabaseQueryPre18)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statDatabaseQueryPG18)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -520,6 +531,8 @@ func TestPGStatDatabaseCollectorTestNilStatReset(t *testing.T) { {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 16}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 823}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 3}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 2}, } convey.Convey("Metrics comparison", t, func() { From ed7f09f08d5be6c54e3f17b1b6227530984c5ca8 Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Fri, 3 Oct 2025 19:00:43 +0200 Subject: [PATCH 05/14] Fix pg_stat_bgwriter collector --- collector/pg_stat_bgwriter.go | 121 ++++++++++++++++++++++------------ 1 file changed, 79 insertions(+), 42 deletions(-) diff --git a/collector/pg_stat_bgwriter.go b/collector/pg_stat_bgwriter.go index 3606aa8c1..3fac6ee0f 100644 --- a/collector/pg_stat_bgwriter.go +++ b/collector/pg_stat_bgwriter.go @@ -47,6 +47,12 @@ var ( []string{"collector", "server"}, prometheus.Labels{}, ) + statBGWriterCheckpointsDoneDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, bgWriterSubsystem, "checkpoints_done_total"), + "Number of completed checkpoints", + []string{"collector", "server"}, + prometheus.Labels{}, + ) statBGWriterCheckpointsReqTimeDesc = prometheus.NewDesc( prometheus.BuildFQName(namespace, bgWriterSubsystem, "checkpoint_write_time_total"), "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds", @@ -95,21 +101,15 @@ var ( []string{"collector", "server"}, prometheus.Labels{}, ) - statBGWriterStatsResetDesc = prometheus.NewDesc( - prometheus.BuildFQName(namespace, bgWriterSubsystem, "stats_reset_total"), - "Time at which these statistics were last reset", - []string{"collector", "server"}, - prometheus.Labels{}, - ) - statCheckpointerNumDoneDesc = prometheus.NewDesc( - prometheus.BuildFQName(namespace, bgWriterSubsystem, "checkpoints_done_total"), - "Number of completed checkpoints (PostgreSQL 18+)", + statBGWriterCheckpointsSlruWrittenDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, bgWriterSubsystem, "slru_written_total"), + "Number of SLRU buffers written during checkpoints and restartpoints", []string{"collector", "server"}, prometheus.Labels{}, ) - statCheckpointerSlruWrittenDesc = prometheus.NewDesc( - prometheus.BuildFQName(namespace, bgWriterSubsystem, "slru_written_total"), - "Number of SLRU buffers written during checkpoints (PostgreSQL 18+)", + statBGWriterStatsResetDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, bgWriterSubsystem, "stats_reset_total"), + "Time at which these statistics were last reset", []string{"collector", "server"}, prometheus.Labels{}, ) @@ -127,6 +127,12 @@ var statBGWriter = map[string]*prometheus.Desc{ []string{"collector", "server"}, prometheus.Labels{}, ), + "percona_checkpoints_done": prometheus.NewDesc( + prometheus.BuildFQName(namespace, bgWriterSubsystem, "checkpoints_done"), + "Number of completed checkpoints", + []string{"collector", "server"}, + prometheus.Labels{}, + ), "percona_checkpoint_write_time": prometheus.NewDesc( prometheus.BuildFQName(namespace, bgWriterSubsystem, "checkpoint_write_time"), "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds", @@ -175,6 +181,12 @@ var statBGWriter = map[string]*prometheus.Desc{ []string{"collector", "server"}, prometheus.Labels{}, ), + "percona_slru_written": prometheus.NewDesc( + prometheus.BuildFQName(namespace, bgWriterSubsystem, "slru_written"), + "Number of SLRU buffers written during checkpoints and restartpoints", + []string{"collector", "server"}, + prometheus.Labels{}, + ), "percona_stats_reset": prometheus.NewDesc( prometheus.BuildFQName(namespace, bgWriterSubsystem, "stats_reset"), "Time at which these statistics were last reset", @@ -207,38 +219,40 @@ const statBGWriterQueryPost17 = `SELECT const statCheckpointerQueryPre18 = `SELECT num_timed ,num_requested + ,NULL::bigint as num_done ,restartpoints_timed ,restartpoints_req ,restartpoints_done ,write_time ,sync_time ,buffers_written - ,stats_reset - ,NULL::bigint as num_done ,NULL::bigint as slru_written + ,stats_reset FROM pg_stat_checkpointer;` const statCheckpointerQuery18Plus = `SELECT num_timed ,num_requested + ,num_done ,restartpoints_timed ,restartpoints_req ,restartpoints_done ,write_time ,sync_time ,buffers_written - ,stats_reset - ,num_done ,slru_written + ,stats_reset FROM pg_stat_checkpointer;` func (p PGStatBGWriterCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { db := instance.getDB() - var cpt, cpr, bcp, bc, mwc, bb, bbf, ba, numDone, slruWritten sql.NullInt64 + var cpt, cpr, cpd, bcp, bc, mwc, bb, bbf, ba, slruw sql.NullInt64 var cpwt, cpst sql.NullFloat64 var sr sql.NullTime + after18 := instance.version.GTE(semver.Version{Major: 18}) + if instance.version.GE(semver.MustParse("17.0.0")) { row := db.QueryRowContext(ctx, statBGWriterQueryPost17) @@ -251,12 +265,12 @@ func (p PGStatBGWriterCollector) Update(ctx context.Context, instance *instance, // Use version-specific checkpointer query for PostgreSQL 18+ checkpointerQuery := statCheckpointerQueryPre18 - if instance.version.GTE(semver.Version{Major: 18}) { + if after18 { checkpointerQuery = statCheckpointerQuery18Plus } row = db.QueryRowContext(ctx, checkpointerQuery) - err = row.Scan(&cpt, &cpr, &rpt, &rpr, &rpd, &cpwt, &cpst, &bcp, &csr, &numDone, &slruWritten) + err = row.Scan(&cpt, &cpr, &cpd, &rpt, &rpr, &rpd, &cpwt, &cpst, &bcp, &slruw, &csr) if err != nil { return err } @@ -291,6 +305,21 @@ func (p PGStatBGWriterCollector) Update(ctx context.Context, instance *instance, "exporter", instance.name, ) + + cpdMetric := 0.0 + if after18 { + if cpd.Valid { + cpdMetric = float64(cpd.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterCheckpointsDoneDesc, + prometheus.CounterValue, + cpdMetric, + "exporter", + instance.name, + ) + } + cpwtMetric := 0.0 if cpwt.Valid { cpwtMetric = float64(cpwt.Float64) @@ -379,6 +408,19 @@ func (p PGStatBGWriterCollector) Update(ctx context.Context, instance *instance, "exporter", instance.name, ) + slruwMetric := 0.0 + if after18 { + if slruw.Valid { + slruwMetric = float64(slruw.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterCheckpointsSlruWrittenDesc, + prometheus.CounterValue, + slruwMetric, + "exporter", + instance.name, + ) + } srMetric := 0.0 if sr.Valid { srMetric = float64(sr.Time.Unix()) @@ -391,29 +433,6 @@ func (p PGStatBGWriterCollector) Update(ctx context.Context, instance *instance, instance.name, ) - // PostgreSQL 18+ checkpointer metrics - if numDone.Valid { - numDoneMetric := float64(numDone.Int64) - ch <- prometheus.MustNewConstMetric( - statCheckpointerNumDoneDesc, - prometheus.CounterValue, - numDoneMetric, - "exporter", - instance.name, - ) - } - - if slruWritten.Valid { - slruWrittenMetric := float64(slruWritten.Int64) - ch <- prometheus.MustNewConstMetric( - statCheckpointerSlruWrittenDesc, - prometheus.CounterValue, - slruWrittenMetric, - "exporter", - instance.name, - ) - } - // TODO: analyze metrics below, why do we duplicate them? ch <- prometheus.MustNewConstMetric( @@ -430,6 +449,15 @@ func (p PGStatBGWriterCollector) Update(ctx context.Context, instance *instance, "exporter", instance.name, ) + if after18 { + ch <- prometheus.MustNewConstMetric( + statBGWriter["percona_checkpoints_done"], + prometheus.CounterValue, + cpdMetric, + "exporter", + instance.name, + ) + } ch <- prometheus.MustNewConstMetric( statBGWriter["percona_checkpoint_write_time"], prometheus.CounterValue, @@ -486,6 +514,15 @@ func (p PGStatBGWriterCollector) Update(ctx context.Context, instance *instance, "exporter", instance.name, ) + if after18 { + ch <- prometheus.MustNewConstMetric( + statBGWriter["percona_slru_written"], + prometheus.CounterValue, + slruwMetric, + "exporter", + instance.name, + ) + } ch <- prometheus.MustNewConstMetric( statBGWriter["percona_stats_reset"], prometheus.CounterValue, From 2b5a597e1ba8332d1a8166d6266549e337807127 Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Tue, 7 Oct 2025 18:55:35 +0200 Subject: [PATCH 06/14] Fix pg_stat_user_tables collector --- collector/pg_stat_user_tables.go | 75 +++++++++++++-------------- collector/pg_stat_user_tables_test.go | 26 +++++++--- 2 files changed, 53 insertions(+), 48 deletions(-) diff --git a/collector/pg_stat_user_tables.go b/collector/pg_stat_user_tables.go index 9e75ec9b9..74e32b8d0 100644 --- a/collector/pg_stat_user_tables.go +++ b/collector/pg_stat_user_tables.go @@ -182,7 +182,7 @@ var ( prometheus.Labels{}, ) - statUserTablesQueryPre18 = `SELECT + statUserTablesQueryPrePG18 = `SELECT current_database() datname, schemaname, relname, @@ -213,7 +213,7 @@ var ( FROM pg_stat_user_tables` - statUserTablesQuery18Plus = `SELECT + statUserTablesQueryPG18 = `SELECT current_database() datname, schemaname, relname, @@ -248,10 +248,11 @@ var ( func (c *PGStatUserTablesCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { db := instance.getDB() + after18 := instance.version.GTE(semver.Version{Major: 18}) // Use version-specific query for PostgreSQL 18+ - query := statUserTablesQueryPre18 - if instance.version.GTE(semver.Version{Major: 18}) { - query = statUserTablesQuery18Plus + query := statUserTablesQueryPrePG18 + if after18 { + query = statUserTablesQueryPG18 } rows, err := db.QueryContext(ctx, query) @@ -505,41 +506,35 @@ func (c *PGStatUserTablesCollector) Update(ctx context.Context, instance *instan datnameLabel, schemanameLabel, relnameLabel, ) - // PostgreSQL 18+ vacuum/analyze timing metrics - if totalVacuumTime.Valid { - ch <- prometheus.MustNewConstMetric( - statUserTablesTotalVacuumTime, - prometheus.CounterValue, - totalVacuumTime.Float64, - datnameLabel, schemanameLabel, relnameLabel, - ) - } - - if totalAutovacuumTime.Valid { - ch <- prometheus.MustNewConstMetric( - statUserTablesTotalAutovacuumTime, - prometheus.CounterValue, - totalAutovacuumTime.Float64, - datnameLabel, schemanameLabel, relnameLabel, - ) - } - - if totalAnalyzeTime.Valid { - ch <- prometheus.MustNewConstMetric( - statUserTablesTotalAnalyzeTime, - prometheus.CounterValue, - totalAnalyzeTime.Float64, - datnameLabel, schemanameLabel, relnameLabel, - ) - } - - if totalAutoanalyzeTime.Valid { - ch <- prometheus.MustNewConstMetric( - statUserTablesTotalAutoanalyzeTime, - prometheus.CounterValue, - totalAutoanalyzeTime.Float64, - datnameLabel, schemanameLabel, relnameLabel, - ) + if after18 { + // PostgreSQL 18+ vacuum/analyze timing metrics + ch <- prometheus.MustNewConstMetric( + statUserTablesTotalVacuumTime, + prometheus.CounterValue, + totalVacuumTime.Float64, + datnameLabel, schemanameLabel, relnameLabel, + ) + + ch <- prometheus.MustNewConstMetric( + statUserTablesTotalAutovacuumTime, + prometheus.CounterValue, + totalAutovacuumTime.Float64, + datnameLabel, schemanameLabel, relnameLabel, + ) + + ch <- prometheus.MustNewConstMetric( + statUserTablesTotalAnalyzeTime, + prometheus.CounterValue, + totalAnalyzeTime.Float64, + datnameLabel, schemanameLabel, relnameLabel, + ) + + ch <- prometheus.MustNewConstMetric( + statUserTablesTotalAutoanalyzeTime, + prometheus.CounterValue, + totalAutoanalyzeTime.Float64, + datnameLabel, schemanameLabel, relnameLabel, + ) } } diff --git a/collector/pg_stat_user_tables_test.go b/collector/pg_stat_user_tables_test.go index 338b24cc3..cc84469cb 100644 --- a/collector/pg_stat_user_tables_test.go +++ b/collector/pg_stat_user_tables_test.go @@ -30,7 +30,7 @@ func TestPGStatUserTablesCollector(t *testing.T) { } defer db.Close() - inst := &instance{db: db} + inst := &instance{db: db, version: pg18} lastVacuumTime, err := time.Parse("2006-01-02Z", "2023-06-02Z") if err != nil { @@ -102,12 +102,12 @@ func TestPGStatUserTablesCollector(t *testing.T) { 13, 14, 15, - nil, - nil, - nil, - nil, + 16, + 17, + 18, + 19, ) - mock.ExpectQuery(sanitizeQuery(statUserTablesQueryPre18)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statUserTablesQueryPG18)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { defer close(ch) @@ -138,6 +138,11 @@ func TestPGStatUserTablesCollector(t *testing.T) { {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_COUNTER, value: 12}, {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_COUNTER, value: 13}, {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_COUNTER, value: 14}, + {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_GAUGE, value: 15}, + {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_COUNTER, value: 16}, + {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_COUNTER, value: 17}, + {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_COUNTER, value: 18}, + {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_COUNTER, value: 19}, } convey.Convey("Metrics comparison", t, func() { @@ -158,7 +163,7 @@ func TestPGStatUserTablesCollectorNullValues(t *testing.T) { } defer db.Close() - inst := &instance{db: db} + inst := &instance{db: db, version: pg18} columns := []string{ "datname", @@ -218,7 +223,7 @@ func TestPGStatUserTablesCollectorNullValues(t *testing.T) { nil, nil, ) - mock.ExpectQuery(sanitizeQuery(statUserTablesQueryPre18)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statUserTablesQueryPG18)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { defer close(ch) @@ -249,6 +254,11 @@ func TestPGStatUserTablesCollectorNullValues(t *testing.T) { {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, } convey.Convey("Metrics comparison", t, func() { From bf374db0371020868877d5818bf2fe36c2460227 Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Thu, 9 Oct 2025 18:39:50 +0200 Subject: [PATCH 07/14] Fix pg_stat_io collector --- collector/pg_stat_io.go | 285 ++++++++++++++++++++++++++++------- collector/pg_stat_io_test.go | 91 ++++++----- 2 files changed, 285 insertions(+), 91 deletions(-) diff --git a/collector/pg_stat_io.go b/collector/pg_stat_io.go index 70fe09bae..053826926 100644 --- a/collector/pg_stat_io.go +++ b/collector/pg_stat_io.go @@ -39,49 +39,97 @@ func NewPGStatIOCollector(config collectorConfig) (Collector, error) { var ( statIOReads = prometheus.NewDesc( prometheus.BuildFQName(namespace, statIOSubsystem, "reads_total"), - "Number of reads", + "Number of read operations", + []string{"backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + statIOReadBytes = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "read_bytes_total"), + "The total size of read operations, in bytes", []string{"backend_type", "io_context", "io_object"}, prometheus.Labels{}, ) statIOReadTime = prometheus.NewDesc( - prometheus.BuildFQName(namespace, statIOSubsystem, "read_time_seconds_total"), - "Time spent reading", + prometheus.BuildFQName(namespace, statIOSubsystem, "read_time_total"), + "Time spent waiting for read operations, in milliseconds", []string{"backend_type", "io_context", "io_object"}, prometheus.Labels{}, ) statIOWrites = prometheus.NewDesc( prometheus.BuildFQName(namespace, statIOSubsystem, "writes_total"), - "Number of writes", + "Number of write operations", + []string{"backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + statIOWriteBytes = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "write_bytes_total"), + "The total size of write operations, in bytes", []string{"backend_type", "io_context", "io_object"}, prometheus.Labels{}, ) statIOWriteTime = prometheus.NewDesc( - prometheus.BuildFQName(namespace, statIOSubsystem, "write_time_seconds_total"), - "Time spent writing", + prometheus.BuildFQName(namespace, statIOSubsystem, "write_time_total"), + "Time spent waiting for write operations, in milliseconds", []string{"backend_type", "io_context", "io_object"}, prometheus.Labels{}, ) - statIOExtends = prometheus.NewDesc( - prometheus.BuildFQName(namespace, statIOSubsystem, "extends_total"), - "Number of extends", + statIOWritebacks = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "writebacks_total"), + "Number of units of size BLCKSZ (typically 8kB) which the process requested the kernel write out to permanent storage", []string{"backend_type", "io_context", "io_object"}, prometheus.Labels{}, ) - statIOReadBytes = prometheus.NewDesc( - prometheus.BuildFQName(namespace, statIOSubsystem, "read_bytes_total"), - "Number of bytes read (PostgreSQL 18+)", + statIOWritebackTime = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "writeback_time_total"), + "Time spent waiting for writeback operations, in milliseconds", []string{"backend_type", "io_context", "io_object"}, prometheus.Labels{}, ) - statIOWriteBytes = prometheus.NewDesc( - prometheus.BuildFQName(namespace, statIOSubsystem, "write_bytes_total"), - "Number of bytes written (PostgreSQL 18+)", + statIOExtends = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "extends_total"), + "Number of extend operations", []string{"backend_type", "io_context", "io_object"}, prometheus.Labels{}, ) statIOExtendBytes = prometheus.NewDesc( prometheus.BuildFQName(namespace, statIOSubsystem, "extend_bytes_total"), - "Number of bytes extended (PostgreSQL 18+)", + "The total size of extend operations, in bytes", + []string{"backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + statIOExtendTime = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "extend_time_total"), + "Time spent waiting for extend operations, in milliseconds", + []string{"backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + statIOHits = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "hits_total"), + "The number of times a desired block was found in a shared buffer", + []string{"backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + statIOEvictions = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "evictions_total"), + "Number of times a block has been written out from a shared or local buffer in order to make it available for another use", + []string{"backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + statIOReueses = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "reueses_total"), + "The number of times an existing buffer in a size-limited ring buffer outside of shared buffers was reused as part of an I/O operation in the bulkread, bulkwrite, or vacuum contexts", + []string{"backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + statIOFsyncs = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "fsyncs_total"), + "Number of fsync calls", + []string{"backend_type", "io_context", "io_object"}, + prometheus.Labels{}, + ) + statIOFsyncTime = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statIOSubsystem, "fsync_time_total"), + "Time spent waiting for fsync operations, in milliseconds", []string{"backend_type", "io_context", "io_object"}, prometheus.Labels{}, ) @@ -90,16 +138,24 @@ var ( StatIOQuery18Plus = ` SELECT backend_type, - io_context, io_object, + io_context, reads, + read_bytes, read_time, writes, + write_bytes, write_time, + writebacks, + writeback_time, extends, - read_bytes, - write_bytes, - extend_bytes + extend_bytes, + extend_time, + hits, + evictions, + reueses, + fsyncs + fsync_time, FROM pg_stat_io ` @@ -107,16 +163,24 @@ var ( StatIOQueryPre18 = ` SELECT backend_type, - io_context, io_object, + io_context, reads, + NULL::bigint as read_bytes, read_time, writes, + NULL::bigint as write_bytes, write_time, + writebacks, + writeback_time, extends, - NULL::bigint as read_bytes, - NULL::bigint as write_bytes, - NULL::bigint as extend_bytes + NULL::numeric as extend_bytes, + extend_time, + hits, + NULL::bigint as evictions, + reueses, + fsyncs + fsync_time FROM pg_stat_io ` ) @@ -129,9 +193,10 @@ func (c *PGStatIOCollector) Update(ctx context.Context, instance *instance, ch c db := instance.getDB() + after18 := instance.version.GTE(semver.Version{Major: 18}) // Use version-specific query for PostgreSQL 18+ query := StatIOQueryPre18 - if instance.version.GTE(semver.Version{Major: 18}) { + if after18 { query = StatIOQuery18Plus } @@ -143,21 +208,29 @@ func (c *PGStatIOCollector) Update(ctx context.Context, instance *instance, ch c for rows.Next() { var backendType, ioContext, ioObject sql.NullString - var reads, writes, extends, readBytes, writeBytes, extendBytes sql.NullFloat64 - var readTime, writeTime sql.NullFloat64 + var reads, writes, writebacks, extends, hits, evictions, reueses, fsyncs sql.NullInt64 + var readBytes, writeBytes, extendBytes, readTime, writeTime, extendTime, writebackTime, fsyncTime sql.NullFloat64 err := rows.Scan( &backendType, - &ioContext, &ioObject, + &ioContext, &reads, + &readBytes, &readTime, &writes, + &writeBytes, &writeTime, + &writebacks, + &writebackTime, &extends, - &readBytes, - &writeBytes, &extendBytes, + &extendTime, + &hits, + &evictions, + &reueses, + &fsyncs, + &fsyncTime, ) if err != nil { return err @@ -167,89 +240,189 @@ func (c *PGStatIOCollector) Update(ctx context.Context, instance *instance, ch c if backendType.Valid { backendTypeLabel = backendType.String } - ioContextLabel := "unknown" - if ioContext.Valid { - ioContextLabel = ioContext.String - } ioObjectLabel := "unknown" if ioObject.Valid { ioObjectLabel = ioObject.String } + ioContextLabel := "unknown" + if ioContext.Valid { + ioContextLabel = ioContext.String + } labels := []string{backendTypeLabel, ioContextLabel, ioObjectLabel} + readsMetric := 0.0 if reads.Valid { + readsMetric = float64(reads.Int64) + } + ch <- prometheus.MustNewConstMetric( + statIOReads, + prometheus.CounterValue, + readsMetric, + labels..., + ) + + if readTime.Valid { ch <- prometheus.MustNewConstMetric( - statIOReads, + statIOReadTime, prometheus.CounterValue, - reads.Float64, + readTime.Float64, labels..., ) } - if readTime.Valid { + writesMetric := 0.0 + if writes.Valid { + writesMetric = float64(writes.Int64) + } + ch <- prometheus.MustNewConstMetric( + statIOWrites, + prometheus.CounterValue, + writesMetric, + labels..., + ) + + if writeTime.Valid { ch <- prometheus.MustNewConstMetric( - statIOReadTime, + statIOWriteTime, prometheus.CounterValue, - readTime.Float64/1000.0, // Convert milliseconds to seconds + writeTime.Float64, labels..., ) } - if writes.Valid { + writebacksMetric := 0.0 + if writebacks.Valid { + writebacksMetric = float64(writebacks.Int64) + } + if writebacks.Valid { ch <- prometheus.MustNewConstMetric( - statIOWrites, + statIOWritebacks, prometheus.CounterValue, - writes.Float64, + writebacksMetric, labels..., ) } - if writeTime.Valid { + if writebackTime.Valid { ch <- prometheus.MustNewConstMetric( - statIOWriteTime, + statIOWritebackTime, prometheus.CounterValue, - writeTime.Float64/1000.0, // Convert milliseconds to seconds + writebackTime.Float64, labels..., ) } + extendsMetric := 0.0 + if extends.Valid { + extendsMetric = float64(extends.Int64) + } if extends.Valid { ch <- prometheus.MustNewConstMetric( statIOExtends, prometheus.CounterValue, - extends.Float64, + extendsMetric, labels..., ) } - // PostgreSQL 18+ byte statistics - if readBytes.Valid { + if extendTime.Valid { + ch <- prometheus.MustNewConstMetric( + statIOExtendTime, + prometheus.CounterValue, + extendTime.Float64, + labels..., + ) + } + + hitsMetric := 0.0 + if hits.Valid { + hitsMetric = float64(hits.Int64) + } + if hits.Valid { + ch <- prometheus.MustNewConstMetric( + statIOHits, + prometheus.CounterValue, + hitsMetric, + labels..., + ) + } + evictionsMetric := 0.0 + if evictions.Valid { + evictionsMetric = float64(evictions.Int64) + } + if evictions.Valid { + ch <- prometheus.MustNewConstMetric( + statIOEvictions, + prometheus.CounterValue, + evictionsMetric, + labels..., + ) + } + reuesesMetric := 0.0 + if reueses.Valid { + reuesesMetric = float64(reueses.Int64) + } + if reueses.Valid { ch <- prometheus.MustNewConstMetric( - statIOReadBytes, + statIOReueses, prometheus.CounterValue, - readBytes.Float64, + reuesesMetric, labels..., ) } - if writeBytes.Valid { + fsyncsMetric := 0.0 + if fsyncs.Valid { + fsyncsMetric = float64(fsyncs.Int64) + } + if fsyncs.Valid { ch <- prometheus.MustNewConstMetric( - statIOWriteBytes, + statIOFsyncs, prometheus.CounterValue, - writeBytes.Float64, + fsyncsMetric, labels..., ) } - if extendBytes.Valid { + if fsyncTime.Valid { ch <- prometheus.MustNewConstMetric( - statIOExtendBytes, + statIOFsyncTime, prometheus.CounterValue, - extendBytes.Float64, + fsyncTime.Float64, labels..., ) } + + // PostgreSQL 18+ byte statistics + if after18 { + if readBytes.Valid { + ch <- prometheus.MustNewConstMetric( + statIOReadBytes, + prometheus.CounterValue, + readBytes.Float64, + labels..., + ) + } + + if writeBytes.Valid { + ch <- prometheus.MustNewConstMetric( + statIOWriteBytes, + prometheus.CounterValue, + writeBytes.Float64, + labels..., + ) + } + + if extendBytes.Valid { + ch <- prometheus.MustNewConstMetric( + statIOExtendBytes, + prometheus.CounterValue, + extendBytes.Float64, + labels..., + ) + } + } } return nil diff --git a/collector/pg_stat_io_test.go b/collector/pg_stat_io_test.go index d478ab8cb..501f72829 100644 --- a/collector/pg_stat_io_test.go +++ b/collector/pg_stat_io_test.go @@ -20,6 +20,8 @@ import ( "github.com/DATA-DOG/go-sqlmock" "github.com/blang/semver/v4" "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/smartystreets/goconvey/convey" ) func TestPGStatIOCollector(t *testing.T) { @@ -31,9 +33,9 @@ func TestPGStatIOCollector(t *testing.T) { inst := &instance{db: db, version: semver.MustParse("16.0.0")} - columns := []string{"backend_type", "io_context", "io_object", "reads", "read_time", "writes", "write_time", "extends", "read_bytes", "write_bytes", "extend_bytes"} + columns := []string{"backend_type", "io_object", "io_context", "reads", "read_bytes", "read_time", "writes", "write_bytes", "write_time", "writebacks", "writeback_time", "extends", "extend_bytes", "extend_time", "hits", "evictions", "reueses", "fsyncs", "fsync_time"} rows := sqlmock.NewRows(columns). - AddRow("client backend", "normal", "relation", 100, 50.5, 75, 25.2, 10, nil, nil, nil) + AddRow("client backend", "relation", "normal", 100, nil, 50.5, 75 ,nil, 25.2, 10, 12.0, 7, nil, 11.0, 1 ,2 ,3, 4, 8.0 ) mock.ExpectQuery("SELECT.*backend_type.*FROM pg_stat_io").WillReturnRows(rows) ch := make(chan prometheus.Metric) @@ -46,20 +48,31 @@ func TestPGStatIOCollector(t *testing.T) { } }() - expected := 5 // reads, read_time, writes, write_time, extends (no byte metrics for v16) - - metricCount := 0 - for m := range ch { - metricCount++ - _ = m - } - - if metricCount != expected { - t.Errorf("Expected %d metrics, got %d", expected, metricCount) + labels := labelMap{"backend_type": "client backend", "io_object": "relation", "io_context": "normal",} + expected := []MetricResult{ + {labels: labels, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 50.5}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 75}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 25.2}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 10}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 12.0}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 7}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 11.0}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 1}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 2}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 3}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 4}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 8.0}, } + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) if err := mock.ExpectationsWereMet(); err != nil { - t.Errorf("There were unfulfilled expectations: %s", err) + t.Errorf("there were unfulfilled exceptions: %s", err) } } @@ -70,11 +83,11 @@ func TestPGStatIOCollectorPostgreSQL18(t *testing.T) { } defer db.Close() - inst := &instance{db: db, version: semver.MustParse("18.0.0")} + inst := &instance{db: db, version: pg18} - columns := []string{"backend_type", "io_context", "io_object", "reads", "read_time", "writes", "write_time", "extends", "read_bytes", "write_bytes", "extend_bytes"} + columns := []string{"backend_type", "io_context", "io_object", "reads", "read_bytes", "read_time", "writes", "write_bytes", "write_time", "writebacks", "writeback_time", "extends", "extend_bytes", "extend_time", "hits", "evictions", "reueses", "fsyncs", "fsync_time"} rows := sqlmock.NewRows(columns). - AddRow("client backend", "normal", "relation", 100, 50.5, 75, 25.2, 10, 1024, 2048, 512) + AddRow("client backend", "relation", "normal", 100, 90, 50.5, 75, 80, 25.2, 10, 12.0, 7, 30, 11.0, 1, 2, 3, 4, 8.0) mock.ExpectQuery("SELECT.*backend_type.*FROM pg_stat_io").WillReturnRows(rows) ch := make(chan prometheus.Metric) @@ -87,20 +100,34 @@ func TestPGStatIOCollectorPostgreSQL18(t *testing.T) { } }() - expected := 8 // reads, read_time, writes, write_time, extends, read_bytes, write_bytes, extend_bytes - - metricCount := 0 - for m := range ch { - metricCount++ - _ = m - } - - if metricCount != expected { - t.Errorf("Expected %d metrics, got %d", expected, metricCount) + labels := labelMap{"backend_type": "client backend", "io_object": "relation", "io_context": "normal",} + expected := []MetricResult{ + {labels: labels, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 50.5}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 75}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 25.2}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 10}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 12.0}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 7}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 11.0}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 1}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 2}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 3}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 4}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 8.0}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 90}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 80}, + {labels: labels, metricType: dto.MetricType_COUNTER, value: 30}, } + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) if err := mock.ExpectationsWereMet(); err != nil { - t.Errorf("There were unfulfilled expectations: %s", err) + t.Errorf("there were unfulfilled exceptions: %s", err) } } @@ -128,13 +155,7 @@ func TestPGStatIOCollectorPrePostgreSQL16(t *testing.T) { t.Errorf("There were unfulfilled expectations: %s", err) } - metricCount := 0 - for m := range ch { - metricCount++ - _ = m - } - - if metricCount != 0 { - t.Errorf("Expected 0 metrics for PostgreSQL < 16, got %d", metricCount) + for _ = range ch { + t.Error("Don't expect any metrics for PostgreSQL < 16") } } From 77e479a1455bd957d86d6aa3162202a7df166eda Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Thu, 9 Oct 2025 18:53:54 +0200 Subject: [PATCH 08/14] Cleanup --- collector/pg_backend_memory_contexts.go | 216 ----------------- collector/pg_backend_stats.go | 310 ------------------------ 2 files changed, 526 deletions(-) delete mode 100644 collector/pg_backend_memory_contexts.go delete mode 100644 collector/pg_backend_stats.go diff --git a/collector/pg_backend_memory_contexts.go b/collector/pg_backend_memory_contexts.go deleted file mode 100644 index 2146d69ba..000000000 --- a/collector/pg_backend_memory_contexts.go +++ /dev/null @@ -1,216 +0,0 @@ -// Copyright 2024 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package collector - -import ( - "context" - "database/sql" - - "github.com/blang/semver/v4" - "github.com/go-kit/log" - "github.com/prometheus/client_golang/prometheus" -) - -const backendMemoryContextsSubsystem = "backend_memory_contexts" - -func init() { - registerCollector(backendMemoryContextsSubsystem, defaultDisabled, NewPGBackendMemoryContextsCollector) -} - -type PGBackendMemoryContextsCollector struct { - log log.Logger -} - -func NewPGBackendMemoryContextsCollector(config collectorConfig) (Collector, error) { - return &PGBackendMemoryContextsCollector{log: config.logger}, nil -} - -var ( - backendMemoryContextsTotalBytes = prometheus.NewDesc( - prometheus.BuildFQName(namespace, backendMemoryContextsSubsystem, "total_bytes"), - "Total bytes allocated for memory context", - []string{"pid", "name", "ident", "parent", "level", "type", "path"}, - prometheus.Labels{}, - ) - backendMemoryContextsUsedBytes = prometheus.NewDesc( - prometheus.BuildFQName(namespace, backendMemoryContextsSubsystem, "used_bytes"), - "Used bytes in memory context", - []string{"pid", "name", "ident", "parent", "level", "type", "path"}, - prometheus.Labels{}, - ) - backendMemoryContextsFreeBytes = prometheus.NewDesc( - prometheus.BuildFQName(namespace, backendMemoryContextsSubsystem, "free_bytes"), - "Free bytes in memory context", - []string{"pid", "name", "ident", "parent", "level", "type", "path"}, - prometheus.Labels{}, - ) - backendMemoryContextsFreeChunks = prometheus.NewDesc( - prometheus.BuildFQName(namespace, backendMemoryContextsSubsystem, "free_chunks"), - "Number of free chunks in memory context", - []string{"pid", "name", "ident", "parent", "level", "type", "path"}, - prometheus.Labels{}, - ) - - // PostgreSQL 18+ query with type and path columns - backendMemoryContextsQuery18Plus = ` - SELECT - pid, - name, - COALESCE(ident, '') as ident, - COALESCE(parent, '') as parent, - level, - total_bytes, - total_nblocks, - free_bytes, - free_chunks, - used_bytes, - type, - path - FROM pg_backend_memory_contexts - ORDER BY pid, name - ` - - // Pre-PostgreSQL 18 query without type and path columns - backendMemoryContextsQueryPre18 = ` - SELECT - pid, - name, - COALESCE(ident, '') as ident, - COALESCE(parent, '') as parent, - level, - total_bytes, - total_nblocks, - free_bytes, - free_chunks, - used_bytes, - '' as type, - '' as path - FROM pg_backend_memory_contexts - ORDER BY pid, name - ` -) - -func (c *PGBackendMemoryContextsCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { - // pg_backend_memory_contexts was introduced in PostgreSQL 14 - if instance.version.LT(semver.Version{Major: 14}) { - return nil - } - - db := instance.getDB() - - // Use version-specific query for PostgreSQL 18+ - query := backendMemoryContextsQueryPre18 - if instance.version.GTE(semver.Version{Major: 18}) { - query = backendMemoryContextsQuery18Plus - } - - rows, err := db.QueryContext(ctx, query) - if err != nil { - return err - } - defer rows.Close() - - for rows.Next() { - var pid, name, ident, parent, contextType, path sql.NullString - var level, totalNblocks, freeChunks sql.NullInt64 - var totalBytes, freeBytes, usedBytes sql.NullFloat64 - - err := rows.Scan( - &pid, - &name, - &ident, - &parent, - &level, - &totalBytes, - &totalNblocks, - &freeBytes, - &freeChunks, - &usedBytes, - &contextType, - &path, - ) - if err != nil { - return err - } - - pidLabel := "unknown" - if pid.Valid { - pidLabel = pid.String - } - nameLabel := "unknown" - if name.Valid { - nameLabel = name.String - } - identLabel := "" - if ident.Valid { - identLabel = ident.String - } - parentLabel := "" - if parent.Valid { - parentLabel = parent.String - } - levelLabel := "0" - if level.Valid { - levelLabel = string(rune(level.Int64 + '0')) - } - typeLabel := "" - if contextType.Valid { - typeLabel = contextType.String - } - pathLabel := "" - if path.Valid { - pathLabel = path.String - } - - labels := []string{pidLabel, nameLabel, identLabel, parentLabel, levelLabel, typeLabel, pathLabel} - - if totalBytes.Valid { - ch <- prometheus.MustNewConstMetric( - backendMemoryContextsTotalBytes, - prometheus.GaugeValue, - totalBytes.Float64, - labels..., - ) - } - - if usedBytes.Valid { - ch <- prometheus.MustNewConstMetric( - backendMemoryContextsUsedBytes, - prometheus.GaugeValue, - usedBytes.Float64, - labels..., - ) - } - - if freeBytes.Valid { - ch <- prometheus.MustNewConstMetric( - backendMemoryContextsFreeBytes, - prometheus.GaugeValue, - freeBytes.Float64, - labels..., - ) - } - - if freeChunks.Valid { - ch <- prometheus.MustNewConstMetric( - backendMemoryContextsFreeChunks, - prometheus.GaugeValue, - float64(freeChunks.Int64), - labels..., - ) - } - } - - return nil -} diff --git a/collector/pg_backend_stats.go b/collector/pg_backend_stats.go deleted file mode 100644 index e28aa70ca..000000000 --- a/collector/pg_backend_stats.go +++ /dev/null @@ -1,310 +0,0 @@ -// Copyright 2024 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package collector - -import ( - "context" - "database/sql" - - "github.com/blang/semver/v4" - "github.com/go-kit/log" - "github.com/prometheus/client_golang/prometheus" -) - -const backendStatsSubsystem = "backend_stats" - -func init() { - registerCollector(backendStatsSubsystem, defaultDisabled, NewPGBackendStatsCollector) -} - -type PGBackendStatsCollector struct { - log log.Logger -} - -func NewPGBackendStatsCollector(config collectorConfig) (Collector, error) { - return &PGBackendStatsCollector{log: config.logger}, nil -} - -var ( - // Backend I/O statistics metrics (PostgreSQL 18+) - backendIOReads = prometheus.NewDesc( - prometheus.BuildFQName(namespace, backendStatsSubsystem, "io_reads_total"), - "Number of reads by backend (PostgreSQL 18+)", - []string{"pid", "backend_type", "io_context", "io_object"}, - prometheus.Labels{}, - ) - backendIOWrites = prometheus.NewDesc( - prometheus.BuildFQName(namespace, backendStatsSubsystem, "io_writes_total"), - "Number of writes by backend (PostgreSQL 18+)", - []string{"pid", "backend_type", "io_context", "io_object"}, - prometheus.Labels{}, - ) - backendIOExtends = prometheus.NewDesc( - prometheus.BuildFQName(namespace, backendStatsSubsystem, "io_extends_total"), - "Number of extends by backend (PostgreSQL 18+)", - []string{"pid", "backend_type", "io_context", "io_object"}, - prometheus.Labels{}, - ) - backendIOReadBytes = prometheus.NewDesc( - prometheus.BuildFQName(namespace, backendStatsSubsystem, "io_read_bytes_total"), - "Number of bytes read by backend (PostgreSQL 18+)", - []string{"pid", "backend_type", "io_context", "io_object"}, - prometheus.Labels{}, - ) - backendIOWriteBytes = prometheus.NewDesc( - prometheus.BuildFQName(namespace, backendStatsSubsystem, "io_write_bytes_total"), - "Number of bytes written by backend (PostgreSQL 18+)", - []string{"pid", "backend_type", "io_context", "io_object"}, - prometheus.Labels{}, - ) - backendIOExtendBytes = prometheus.NewDesc( - prometheus.BuildFQName(namespace, backendStatsSubsystem, "io_extend_bytes_total"), - "Number of bytes extended by backend (PostgreSQL 18+)", - []string{"pid", "backend_type", "io_context", "io_object"}, - prometheus.Labels{}, - ) - - // Backend WAL statistics metrics (PostgreSQL 18+) - backendWALRecords = prometheus.NewDesc( - prometheus.BuildFQName(namespace, backendStatsSubsystem, "wal_records_total"), - "Number of WAL records generated by backend (PostgreSQL 18+)", - []string{"pid"}, - prometheus.Labels{}, - ) - backendWALBytes = prometheus.NewDesc( - prometheus.BuildFQName(namespace, backendStatsSubsystem, "wal_bytes_total"), - "Number of WAL bytes generated by backend (PostgreSQL 18+)", - []string{"pid"}, - prometheus.Labels{}, - ) - backendWALBuffersUsed = prometheus.NewDesc( - prometheus.BuildFQName(namespace, backendStatsSubsystem, "wal_buffers_used_total"), - "Number of WAL buffers used by backend (PostgreSQL 18+)", - []string{"pid"}, - prometheus.Labels{}, - ) - - // Backend I/O query for PostgreSQL 18+ - backendIOQuery = ` - SELECT - pid, - backend_type, - io_context, - io_object, - reads, - writes, - extends, - read_bytes, - write_bytes, - extend_bytes - FROM pg_stat_get_backend_io(NULL) - WHERE pid IS NOT NULL - ` - - // Backend WAL query for PostgreSQL 18+ - backendWALQuery = ` - SELECT - pid, - wal_records, - wal_bytes, - wal_buffers_used - FROM pg_stat_get_backend_wal(NULL) - WHERE pid IS NOT NULL - ` -) - -func (c *PGBackendStatsCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { - // Backend statistics functions were introduced in PostgreSQL 18 - if instance.version.LT(semver.Version{Major: 18}) { - return nil - } - - db := instance.getDB() - - // Collect backend I/O statistics - if err := c.collectBackendIO(ctx, db, ch); err != nil { - return err - } - - // Collect backend WAL statistics - if err := c.collectBackendWAL(ctx, db, ch); err != nil { - return err - } - - return nil -} - -func (c *PGBackendStatsCollector) collectBackendIO(ctx context.Context, db *sql.DB, ch chan<- prometheus.Metric) error { - rows, err := db.QueryContext(ctx, backendIOQuery) - if err != nil { - return err - } - defer rows.Close() - - for rows.Next() { - var pid sql.NullString - var backendType, ioContext, ioObject sql.NullString - var reads, writes, extends, readBytes, writeBytes, extendBytes sql.NullFloat64 - - err := rows.Scan( - &pid, - &backendType, - &ioContext, - &ioObject, - &reads, - &writes, - &extends, - &readBytes, - &writeBytes, - &extendBytes, - ) - if err != nil { - return err - } - - pidLabel := "unknown" - if pid.Valid { - pidLabel = pid.String - } - backendTypeLabel := "unknown" - if backendType.Valid { - backendTypeLabel = backendType.String - } - ioContextLabel := "unknown" - if ioContext.Valid { - ioContextLabel = ioContext.String - } - ioObjectLabel := "unknown" - if ioObject.Valid { - ioObjectLabel = ioObject.String - } - - labels := []string{pidLabel, backendTypeLabel, ioContextLabel, ioObjectLabel} - - if reads.Valid { - ch <- prometheus.MustNewConstMetric( - backendIOReads, - prometheus.CounterValue, - reads.Float64, - labels..., - ) - } - - if writes.Valid { - ch <- prometheus.MustNewConstMetric( - backendIOWrites, - prometheus.CounterValue, - writes.Float64, - labels..., - ) - } - - if extends.Valid { - ch <- prometheus.MustNewConstMetric( - backendIOExtends, - prometheus.CounterValue, - extends.Float64, - labels..., - ) - } - - if readBytes.Valid { - ch <- prometheus.MustNewConstMetric( - backendIOReadBytes, - prometheus.CounterValue, - readBytes.Float64, - labels..., - ) - } - - if writeBytes.Valid { - ch <- prometheus.MustNewConstMetric( - backendIOWriteBytes, - prometheus.CounterValue, - writeBytes.Float64, - labels..., - ) - } - - if extendBytes.Valid { - ch <- prometheus.MustNewConstMetric( - backendIOExtendBytes, - prometheus.CounterValue, - extendBytes.Float64, - labels..., - ) - } - } - - return nil -} - -func (c *PGBackendStatsCollector) collectBackendWAL(ctx context.Context, db *sql.DB, ch chan<- prometheus.Metric) error { - rows, err := db.QueryContext(ctx, backendWALQuery) - if err != nil { - return err - } - defer rows.Close() - - for rows.Next() { - var pid sql.NullString - var walRecords, walBytes, walBuffersUsed sql.NullFloat64 - - err := rows.Scan( - &pid, - &walRecords, - &walBytes, - &walBuffersUsed, - ) - if err != nil { - return err - } - - pidLabel := "unknown" - if pid.Valid { - pidLabel = pid.String - } - - labels := []string{pidLabel} - - if walRecords.Valid { - ch <- prometheus.MustNewConstMetric( - backendWALRecords, - prometheus.CounterValue, - walRecords.Float64, - labels..., - ) - } - - if walBytes.Valid { - ch <- prometheus.MustNewConstMetric( - backendWALBytes, - prometheus.CounterValue, - walBytes.Float64, - labels..., - ) - } - - if walBuffersUsed.Valid { - ch <- prometheus.MustNewConstMetric( - backendWALBuffersUsed, - prometheus.CounterValue, - walBuffersUsed.Float64, - labels..., - ) - } - } - - return nil -} From 74863ab9aa85247f4f97eb638228ed0338abc7e5 Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Thu, 9 Oct 2025 19:16:07 +0200 Subject: [PATCH 09/14] Format --- collector/pg_stat_io_test.go | 8 ++--- collector/pg_stat_user_tables.go | 54 ++++++++++++++++---------------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/collector/pg_stat_io_test.go b/collector/pg_stat_io_test.go index 501f72829..973fd06e8 100644 --- a/collector/pg_stat_io_test.go +++ b/collector/pg_stat_io_test.go @@ -33,9 +33,9 @@ func TestPGStatIOCollector(t *testing.T) { inst := &instance{db: db, version: semver.MustParse("16.0.0")} - columns := []string{"backend_type", "io_object", "io_context", "reads", "read_bytes", "read_time", "writes", "write_bytes", "write_time", "writebacks", "writeback_time", "extends", "extend_bytes", "extend_time", "hits", "evictions", "reueses", "fsyncs", "fsync_time"} + columns := []string{"backend_type", "io_object", "io_context", "reads", "read_bytes", "read_time", "writes", "write_bytes", "write_time", "writebacks", "writeback_time", "extends", "extend_bytes", "extend_time", "hits", "evictions", "reueses", "fsyncs", "fsync_time"} rows := sqlmock.NewRows(columns). - AddRow("client backend", "relation", "normal", 100, nil, 50.5, 75 ,nil, 25.2, 10, 12.0, 7, nil, 11.0, 1 ,2 ,3, 4, 8.0 ) + AddRow("client backend", "relation", "normal", 100, nil, 50.5, 75, nil, 25.2, 10, 12.0, 7, nil, 11.0, 1, 2, 3, 4, 8.0) mock.ExpectQuery("SELECT.*backend_type.*FROM pg_stat_io").WillReturnRows(rows) ch := make(chan prometheus.Metric) @@ -48,7 +48,7 @@ func TestPGStatIOCollector(t *testing.T) { } }() - labels := labelMap{"backend_type": "client backend", "io_object": "relation", "io_context": "normal",} + labels := labelMap{"backend_type": "client backend", "io_object": "relation", "io_context": "normal"} expected := []MetricResult{ {labels: labels, metricType: dto.MetricType_COUNTER, value: 100}, {labels: labels, metricType: dto.MetricType_COUNTER, value: 50.5}, @@ -100,7 +100,7 @@ func TestPGStatIOCollectorPostgreSQL18(t *testing.T) { } }() - labels := labelMap{"backend_type": "client backend", "io_object": "relation", "io_context": "normal",} + labels := labelMap{"backend_type": "client backend", "io_object": "relation", "io_context": "normal"} expected := []MetricResult{ {labels: labels, metricType: dto.MetricType_COUNTER, value: 100}, {labels: labels, metricType: dto.MetricType_COUNTER, value: 50.5}, diff --git a/collector/pg_stat_user_tables.go b/collector/pg_stat_user_tables.go index 74e32b8d0..18b48593b 100644 --- a/collector/pg_stat_user_tables.go +++ b/collector/pg_stat_user_tables.go @@ -508,33 +508,33 @@ func (c *PGStatUserTablesCollector) Update(ctx context.Context, instance *instan if after18 { // PostgreSQL 18+ vacuum/analyze timing metrics - ch <- prometheus.MustNewConstMetric( - statUserTablesTotalVacuumTime, - prometheus.CounterValue, - totalVacuumTime.Float64, - datnameLabel, schemanameLabel, relnameLabel, - ) - - ch <- prometheus.MustNewConstMetric( - statUserTablesTotalAutovacuumTime, - prometheus.CounterValue, - totalAutovacuumTime.Float64, - datnameLabel, schemanameLabel, relnameLabel, - ) - - ch <- prometheus.MustNewConstMetric( - statUserTablesTotalAnalyzeTime, - prometheus.CounterValue, - totalAnalyzeTime.Float64, - datnameLabel, schemanameLabel, relnameLabel, - ) - - ch <- prometheus.MustNewConstMetric( - statUserTablesTotalAutoanalyzeTime, - prometheus.CounterValue, - totalAutoanalyzeTime.Float64, - datnameLabel, schemanameLabel, relnameLabel, - ) + ch <- prometheus.MustNewConstMetric( + statUserTablesTotalVacuumTime, + prometheus.CounterValue, + totalVacuumTime.Float64, + datnameLabel, schemanameLabel, relnameLabel, + ) + + ch <- prometheus.MustNewConstMetric( + statUserTablesTotalAutovacuumTime, + prometheus.CounterValue, + totalAutovacuumTime.Float64, + datnameLabel, schemanameLabel, relnameLabel, + ) + + ch <- prometheus.MustNewConstMetric( + statUserTablesTotalAnalyzeTime, + prometheus.CounterValue, + totalAnalyzeTime.Float64, + datnameLabel, schemanameLabel, relnameLabel, + ) + + ch <- prometheus.MustNewConstMetric( + statUserTablesTotalAutoanalyzeTime, + prometheus.CounterValue, + totalAutoanalyzeTime.Float64, + datnameLabel, schemanameLabel, relnameLabel, + ) } } From f66ec6be0401890bda0874da11209941d8622ab0 Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Fri, 10 Oct 2025 16:38:46 +0200 Subject: [PATCH 10/14] Fix linter warning --- collector/pg_stat_io_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collector/pg_stat_io_test.go b/collector/pg_stat_io_test.go index 973fd06e8..0071c902d 100644 --- a/collector/pg_stat_io_test.go +++ b/collector/pg_stat_io_test.go @@ -155,7 +155,7 @@ func TestPGStatIOCollectorPrePostgreSQL16(t *testing.T) { t.Errorf("There were unfulfilled expectations: %s", err) } - for _ = range ch { + for range ch { t.Error("Don't expect any metrics for PostgreSQL < 16") } } From abcfae5018b7d0af9ab8f1b3189a20cea838bedb Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Fri, 10 Oct 2025 17:36:36 +0200 Subject: [PATCH 11/14] Refactor custom queries --- queries-lr.yaml | 8 ++++---- queries-mr.yaml | 25 +++++++------------------ 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/queries-lr.yaml b/queries-lr.yaml index 5b2268a04..aa9cab872 100644 --- a/queries-lr.yaml +++ b/queries-lr.yaml @@ -106,16 +106,16 @@ pg_stat_user_tables: description: "Number of times this table has been analyzed by the autovacuum daemon" - total_vacuum_time: usage: "COUNTER" - description: "Time spent vacuuming this table, in milliseconds (PostgreSQL 18+)" + description: "Time spent vacuuming this table, in milliseconds" - total_autovacuum_time: usage: "COUNTER" - description: "Time spent auto-vacuuming this table, in milliseconds (PostgreSQL 18+)" + description: "Time spent auto-vacuuming this table, in milliseconds" - total_analyze_time: usage: "COUNTER" - description: "Time spent analyzing this table, in milliseconds (PostgreSQL 18+)" + description: "Time spent analyzing this table, in milliseconds" - total_autoanalyze_time: usage: "COUNTER" - description: "Time spent auto-analyzing this table, in milliseconds (PostgreSQL 18+)" + description: "Time spent auto-analyzing this table, in milliseconds" pg_statio_user_tables: query: "SELECT current_database() datname, schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables" diff --git a/queries-mr.yaml b/queries-mr.yaml index d950d1554..49254ce1a 100644 --- a/queries-mr.yaml +++ b/queries-mr.yaml @@ -16,44 +16,33 @@ pg_postmaster: description: "Time at which postmaster started" pg_database: - query: "SELECT pg_database.datname, pg_database_size(pg_database.datname) as size_bytes FROM pg_database" - master: true - cache_seconds: 30 - metrics: - - datname: - usage: "LABEL" - description: "Name of the database" - - size_bytes: - usage: "GAUGE" - description: "Disk space used by the database" - -# PostgreSQL 18+ enhanced database statistics -pg_stat_database_18: query: | SELECT - datname, + pg_database.datname, + pg_database_size(pg_database.datname), CASE WHEN current_setting('server_version_num')::int >= 180000 THEN COALESCE(parallel_workers_to_launch, 0) ELSE 0 END as parallel_workers_to_launch, CASE WHEN current_setting('server_version_num')::int >= 180000 THEN COALESCE(parallel_workers_launched, 0) ELSE 0 END as parallel_workers_launched - FROM - pg_stat_database - WHERE - datname IS NOT NULL + as size_bytes FROM pg_database master: true cache_seconds: 30 metrics: - datname: usage: "LABEL" description: "Name of the database" + - size_bytes: + usage: "GAUGE" + description: "Disk space used by the database" - parallel_workers_to_launch: usage: "COUNTER" description: "Number of parallel workers to launch (PostgreSQL 18+)" - parallel_workers_launched: usage: "COUNTER" description: "Number of parallel workers launched (PostgreSQL 18+)" + #### #pg_stat_statements: # query: "SELECT t2.rolname, t3.datname, queryid, calls, total_time / 1000 as total_time_seconds, min_time / 1000 as min_time_seconds, max_time / 1000 as max_time_seconds, mean_time / 1000 as mean_time_seconds, stddev_time / 1000 as stddev_time_seconds, rows, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, temp_blks_written, blk_read_time / 1000 as blk_read_time_seconds, blk_write_time / 1000 as blk_write_time_seconds FROM pg_stat_statements t1 JOIN pg_roles t2 ON (t1.userid=t2.oid) JOIN pg_database t3 ON (t1.dbid=t3.oid) WHERE t2.rolname != 'rdsadmin'" From 14837966bdc3d849997441acf2623a82bd36c9c7 Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Fri, 10 Oct 2025 17:41:34 +0200 Subject: [PATCH 12/14] Update changelog --- CHANGELOG.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2e4aa846..d214f699f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,10 @@ ## [Unreleased] * [ENHANCEMENT] Add PostgreSQL 18 support: - * Add parallel worker activity metrics (`pg_stat_database_parallel_workers_to_launch`, `pg_stat_database_parallel_workers_launched`) - * Add vacuum/analyze timing metrics (`pg_stat_user_tables_total_vacuum_time`, `pg_stat_user_tables_total_autovacuum_time`, `pg_stat_user_tables_total_analyze_time`, `pg_stat_user_tables_total_autoanalyze_time`) - * Add enhanced checkpointer metrics (`pg_stat_bgwriter_checkpoints_done_total`, `pg_stat_bgwriter_slru_written_total`) + * Add parallel worker activity metrics + * Add vacuum/analyze timing metrics + * Add enhanced checkpointer metrics * Add `pg_stat_io` collector with byte statistics and WAL I/O activity tracking - * Add `pg_backend_stats` collector for per-backend I/O and WAL statistics - * Add enhanced `pg_backend_memory_contexts` collector with type and path columns * [ENHANCEMENT] Update CI tested PostgreSQL versions to include PostgreSQL 18 ## 0.15.0 / 2023-10-27 From 8ddf0d6d1735b42f672b9c021bfe8e347a698dc3 Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Fri, 10 Oct 2025 17:48:18 +0200 Subject: [PATCH 13/14] Update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b22e5f798..de712d68b 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Prometheus exporter for PostgreSQL server metrics. -CI Tested PostgreSQL versions: `11`, `12`, `13`, `14`, `15`, `16`, `18` +CI Tested PostgreSQL versions: `13`, `14`, `15`, `16`, `18` ## Quick Start This package is available for Docker: From 53a399c620fffde3d057b2a5bec6fe3490cee6ec Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Fri, 10 Oct 2025 17:54:09 +0200 Subject: [PATCH 14/14] Revert --- queries-mr.yaml | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/queries-mr.yaml b/queries-mr.yaml index 49254ce1a..700e74b65 100644 --- a/queries-mr.yaml +++ b/queries-mr.yaml @@ -16,17 +16,7 @@ pg_postmaster: description: "Time at which postmaster started" pg_database: - query: | - SELECT - pg_database.datname, - pg_database_size(pg_database.datname), - CASE WHEN current_setting('server_version_num')::int >= 180000 - THEN COALESCE(parallel_workers_to_launch, 0) - ELSE 0 END as parallel_workers_to_launch, - CASE WHEN current_setting('server_version_num')::int >= 180000 - THEN COALESCE(parallel_workers_launched, 0) - ELSE 0 END as parallel_workers_launched - as size_bytes FROM pg_database + query: "SELECT pg_database.datname, pg_database_size(pg_database.datname) as size_bytes FROM pg_database" master: true cache_seconds: 30 metrics: @@ -36,13 +26,6 @@ pg_database: - size_bytes: usage: "GAUGE" description: "Disk space used by the database" - - parallel_workers_to_launch: - usage: "COUNTER" - description: "Number of parallel workers to launch (PostgreSQL 18+)" - - parallel_workers_launched: - usage: "COUNTER" - description: "Number of parallel workers launched (PostgreSQL 18+)" - #### #pg_stat_statements: # query: "SELECT t2.rolname, t3.datname, queryid, calls, total_time / 1000 as total_time_seconds, min_time / 1000 as min_time_seconds, max_time / 1000 as max_time_seconds, mean_time / 1000 as mean_time_seconds, stddev_time / 1000 as stddev_time_seconds, rows, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, temp_blks_written, blk_read_time / 1000 as blk_read_time_seconds, blk_write_time / 1000 as blk_write_time_seconds FROM pg_stat_statements t1 JOIN pg_roles t2 ON (t1.userid=t2.oid) JOIN pg_database t3 ON (t1.dbid=t3.oid) WHERE t2.rolname != 'rdsadmin'"