Skip to content

Commit

Permalink
MGMT-11309: add prometheus expoter for postgresql
Browse files Browse the repository at this point in the history
  • Loading branch information
rccrdpccl committed Aug 1, 2022
1 parent ed0ea92 commit 198fd05
Show file tree
Hide file tree
Showing 2 changed files with 345 additions and 1 deletion.
326 changes: 326 additions & 0 deletions openshift/template-monitoring-postgres.yaml
@@ -0,0 +1,326 @@
apiVersion: template.openshift.io/v1
kind: Template
metadata:
name: assisted-installer-monitoring-postgres
objects:
- apiVersion: v1
kind: ConfigMap
metadata:
name: assisted-installer-prometheus-postgres-exporter-queries
labels:
app: assisted-installer-prometheus-postgres-exporter
data:
allow-snippet-annotations: "false"
config.yaml: |
assisted_installer_events:
query: "SELECT MAX(id) as max_event_id FROM events"
metrics:
- events_count:
usage: "COUNTER"
description: "Numeric ID for events. Equivalent to a ever-increasing counter"
pg_replication:
query: "SELECT CASE WHEN NOT pg_is_in_recovery() THEN 0 ELSE GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag"
master: true
metrics:
- lag:
usage: "GAUGE"
description: "Replication lag behind master in seconds"
pg_postmaster:
query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()"
master: true
metrics:
- start_time_seconds:
usage: "GAUGE"
description: "Time at which postmaster started"
pg_stat_user_tables:
query: |
SELECT
current_database() datname,
schemaname,
relname,
seq_scan,
seq_tup_read,
idx_scan,
idx_tup_fetch,
n_tup_ins,
n_tup_upd,
n_tup_del,
n_tup_hot_upd,
n_live_tup,
n_dead_tup,
n_mod_since_analyze,
COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum,
COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum,
COALESCE(last_analyze, '1970-01-01Z') as last_analyze,
COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze,
vacuum_count,
autovacuum_count,
analyze_count,
autoanalyze_count
FROM
pg_stat_user_tables
metrics:
- datname:
usage: "LABEL"
description: "Name of current database"
- schemaname:
usage: "LABEL"
description: "Name of the schema that this table is in"
- relname:
usage: "LABEL"
description: "Name of this table"
- seq_scan:
usage: "COUNTER"
description: "Number of sequential scans initiated on this table"
- seq_tup_read:
usage: "COUNTER"
description: "Number of live rows fetched by sequential scans"
- idx_scan:
usage: "COUNTER"
description: "Number of index scans initiated on this table"
- idx_tup_fetch:
usage: "COUNTER"
description: "Number of live rows fetched by index scans"
- n_tup_ins:
usage: "COUNTER"
description: "Number of rows inserted"
- n_tup_upd:
usage: "COUNTER"
description: "Number of rows updated"
- n_tup_del:
usage: "COUNTER"
description: "Number of rows deleted"
- n_tup_hot_upd:
usage: "COUNTER"
description: "Number of rows HOT updated (i.e., with no separate index update required)"
- n_live_tup:
usage: "GAUGE"
description: "Estimated number of live rows"
- n_dead_tup:
usage: "GAUGE"
description: "Estimated number of dead rows"
- n_mod_since_analyze:
usage: "GAUGE"
description: "Estimated number of rows changed since last analyze"
- last_vacuum:
usage: "GAUGE"
description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)"
- last_autovacuum:
usage: "GAUGE"
description: "Last time at which this table was vacuumed by the autovacuum daemon"
- last_analyze:
usage: "GAUGE"
description: "Last time at which this table was manually analyzed"
- last_autoanalyze:
usage: "GAUGE"
description: "Last time at which this table was analyzed by the autovacuum daemon"
- vacuum_count:
usage: "COUNTER"
description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)"
- autovacuum_count:
usage: "COUNTER"
description: "Number of times this table has been vacuumed by the autovacuum daemon"
- analyze_count:
usage: "COUNTER"
description: "Number of times this table has been manually analyzed"
- autoanalyze_count:
usage: "COUNTER"
description: "Number of times this table has been analyzed by the autovacuum daemon"
pg_statio_user_tables:
query: "SELECT current_database() datname, schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables"
metrics:
- datname:
usage: "LABEL"
description: "Name of current database"
- schemaname:
usage: "LABEL"
description: "Name of the schema that this table is in"
- relname:
usage: "LABEL"
description: "Name of this table"
- heap_blks_read:
usage: "COUNTER"
description: "Number of disk blocks read from this table"
- heap_blks_hit:
usage: "COUNTER"
description: "Number of buffer hits in this table"
- idx_blks_read:
usage: "COUNTER"
description: "Number of disk blocks read from all indexes on this table"
- idx_blks_hit:
usage: "COUNTER"
description: "Number of buffer hits in all indexes on this table"
- toast_blks_read:
usage: "COUNTER"
description: "Number of disk blocks read from this table's TOAST table (if any)"
- toast_blks_hit:
usage: "COUNTER"
description: "Number of buffer hits in this table's TOAST table (if any)"
- tidx_blks_read:
usage: "COUNTER"
description: "Number of disk blocks read from this table's TOAST table indexes (if any)"
- tidx_blks_hit:
usage: "COUNTER"
description: "Number of buffer hits in this table's TOAST table indexes (if any)"
pg_database:
query: "SELECT pg_database.datname, pg_database_size(pg_database.datname) as size_bytes FROM pg_database"
master: true
cache_seconds: 30
metrics:
- datname:
usage: "LABEL"
description: "Name of the database"
- size_bytes:
usage: "GAUGE"
description: "Disk space used by the database"
pg_stat_activity_idle:
query: |
WITH
metrics AS (
SELECT
application_name,
SUM(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change))::bigint)::float AS process_seconds_sum,
COUNT(*) AS process_seconds_count
FROM pg_stat_activity
WHERE state = 'idle'
GROUP BY application_name
),
buckets AS (
SELECT
application_name,
le,
SUM(
CASE WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change)) <= le
THEN 1
ELSE 0
END
)::bigint AS bucket
FROM
pg_stat_activity,
UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le
GROUP BY application_name, le
ORDER BY application_name, le
)
SELECT
application_name,
process_seconds_sum,
process_seconds_count,
ARRAY_AGG(le) AS process_seconds,
ARRAY_AGG(bucket) AS process_seconds_bucket
FROM metrics JOIN buckets USING (application_name)
GROUP BY 1, 2, 3
metrics:
- application_name:
usage: "LABEL"
description: "Application Name"
- process_seconds:
usage: "HISTOGRAM"
description: "Idle time of server processes"
- apiVersion: v1
kind: Service
metadata:
name: assisted-installer-prometheus-postgres-exporter
labels:
app: assisted-installer-prometheus-postgres-exporter
spec:
type: ClusterIP
ports:
- port: 9187
targetPort: 9187
protocol: TCP
name: http
selector:
app: assisted-installer-prometheus-postgres-exporter
- apiVersion: apps/v1
kind: Deployment
metadata:
name: assisted-installer-prometheus-postgres-exporter
labels:
app: assisted-installer-prometheus-postgres-exporter
spec:
replicas: 1
selector:
matchLabels:
app: assisted-installer-prometheus-postgres-exporter
template:
metadata:
labels:
app: assisted-installer-prometheus-postgres-exporter
spec:
containers:
- name: prometheus-postgres-exporter
args:
- "--extend.query-path=/etc/config.yaml"
env:
- name: DB_HOST
valueFrom:
secretKeyRef:
name: assisted-installer-rds
key: db.host
- name: DATA_SOURCE_URI
value: "$(DB_HOST):5432/?sslmode=disable"
- name: DATA_SOURCE_USER
valueFrom:
secretKeyRef:
name: assisted-installer-rds
key: db.user
- name: DATA_SOURCE_PASS
valueFrom:
secretKeyRef:
name: assisted-installer-rds
key: db.password
image: "${PG_EXPORTER_IMAGE}"
imagePullPolicy: ${PG_EXPOTER_PULL_POLICY}
ports:
- name: http
containerPort: 9187
protocol: TCP
livenessProbe:
initialDelaySeconds: 0
timeoutSeconds: 1
httpGet:
path: /
port: http
readinessProbe:
initialDelaySeconds: 0
timeoutSeconds: 1
httpGet:
path: /
port: http
resources:
limits:
cpu: ${PG_EXPORTER_CPU_LIMIT}
memory: ${PG_EXPORTER_MEMORY_LIMIT}
requests:
cpu: ${PG_EXPORTER_CPU_REQUEST}
memory: ${PG_EXPORTER_MEMORY_REQUEST}

volumeMounts:
- name: queries
mountPath: /etc/config.yaml
subPath: config.yaml
volumes:
- configMap:
defaultMode: 420
name: assisted-installer-prometheus-postgres-exporter-queries
name: queries
parameters:
- name: PG_EXPORTER_CPU_REQUEST
value: "1m"
- name: PG_EXPORTER_CPU_LIMIT
value: "1"
- name: PG_EXPORTER_MEMORY_REQUEST
value: "16Mi"
- name: PG_EXPORTER_MEMORY_LIMIT
value: "64Mi"
- name: PG_EXPORTER_IMAGE
value: "quay.io/prometheuscommunity/postgres-exporter:v0.10.1"
- name: PG_EXPOTER_PULL_POLICY
value: IfNotPresent
20 changes: 19 additions & 1 deletion openshift/template-monitoring.yaml
Expand Up @@ -3,7 +3,7 @@ parameters:
- name: NAMESPACE
value: ''
required: true
apiVersion: v1
apiVersion: template.openshift.io/v1
kind: Template
metadata:
name: assisted-installer
Expand All @@ -26,3 +26,21 @@ objects:
selector:
matchLabels:
app: assisted-service
- apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
prometheus: app-sre
name: servicemonitor-assisted-installer-postgres
spec:
endpoints:
- interval: 30s
path: /metrics
port: http
scheme: http
namespaceSelector:
matchNames:
- ${NAMESPACE}
selector:
matchLabels:
app: assisted-installer-prometheus-postgres-exporter

0 comments on commit 198fd05

Please sign in to comment.