From f0e8565b4908bf2e3f40ef7750cfc90812653a2b Mon Sep 17 00:00:00 2001 From: Javier Date: Fri, 19 May 2023 18:32:13 -0600 Subject: [PATCH] monitoring: update libsonnet files for generate ceph-cluster.json add ceph-cluster.libsonnet file to generate ceph-cluster.json Fixes: https://tracker.ceph.com/issues/61443 Signed-off-by: Javier --- monitoring/ceph-mixin/dashboards.libsonnet | 1 + .../dashboards/ceph-cluster.libsonnet | 1682 ++++++++ .../dashboards/timeseries_panel.libsonnet | 141 + .../ceph-mixin/dashboards/utils.libsonnet | 409 +- .../dashboards_out/ceph-cluster-advanced.json | 3792 +++++++++++++++++ .../features/ceph-cluster.feature | 562 ++- .../ceph-mixin/tests_dashboards/util.py | 12 +- 7 files changed, 6541 insertions(+), 58 deletions(-) create mode 100644 monitoring/ceph-mixin/dashboards/ceph-cluster.libsonnet create mode 100644 monitoring/ceph-mixin/dashboards/timeseries_panel.libsonnet create mode 100644 monitoring/ceph-mixin/dashboards_out/ceph-cluster-advanced.json diff --git a/monitoring/ceph-mixin/dashboards.libsonnet b/monitoring/ceph-mixin/dashboards.libsonnet index 5cae183294f98..4c67f21c039eb 100644 --- a/monitoring/ceph-mixin/dashboards.libsonnet +++ b/monitoring/ceph-mixin/dashboards.libsonnet @@ -6,5 +6,6 @@ (import 'dashboards/pool.libsonnet') + (import 'dashboards/rbd.libsonnet') + (import 'dashboards/rgw.libsonnet') + + (import 'dashboards/ceph-cluster.libsonnet') + { _config:: $._config }, } diff --git a/monitoring/ceph-mixin/dashboards/ceph-cluster.libsonnet b/monitoring/ceph-mixin/dashboards/ceph-cluster.libsonnet new file mode 100644 index 0000000000000..4e916d32427e9 --- /dev/null +++ b/monitoring/ceph-mixin/dashboards/ceph-cluster.libsonnet @@ -0,0 +1,1682 @@ +local g = import 'grafonnet/grafana.libsonnet'; +local u = import 'utils.libsonnet'; + +(import 'utils.libsonnet') { + 'ceph-cluster-advanced.json': u.dashboardSchema( + 'Ceph Cluster - Advanced', + 'Ceph cluster overview', + '', + 'now-6h', + '1m', + 38, + $._config.dashboardTags, + '' + ).addAnnotation( + u.addAnnotationSchema( + 1, + '-- Grafana --', + true, // enable + true, // hide + 'rgba(0, 211, 255, 1)', + 'Annotations & Alerts', + 'dashboard' + ) + ).addRequired( + type='grafana', id='grafana', name='Grafana', version='5.3.2' + ).addRequired( + type='panel', id='graph', name='Graph', version='5.0.0' + ).addRequired( + type='panel', id='heatmap', name='Heatmap', version='5.0.0' + ).addRequired( + type='panel', id='singlestat', name='Singlestat', version='5.0.0' + ). + addTemplate(g.template.datasource('DS_PROMETHEUS', 'prometheus', 'Prometheus', label='Data Source')). + addTemplate( + u.addCustomTemplate( + name='interval', + query='5s,10s,30s,1m,10m,30m,1h,6h,12h,1d,7d,14d,30d', + current='$__auto_interval_interval', + refresh=2, + label='Interval', + auto_count=10, + auto_min='1m', + options=[ + { selected: true, text: 'auto', value: '$__auto_interval_interval' }, + { selected: false, text: '5s', value: '5s' }, + { selected: false, text: '10s', value: '10s' }, + { selected: false, text: '30s', value: '30s' }, + { selected: false, text: '1m', value: '1m' }, + { selected: false, text: '10m', value: '10m' }, + { selected: false, text: '30m', value: '30m' }, + { selected: false, text: '1h', value: '1h' }, + { selected: false, text: '6h', value: '6h' }, + { selected: false, text: '12h', value: '12h' }, + { selected: false, text: '1d', value: '1d' }, + { selected: false, text: '7d', value: '7d' }, + { selected: false, text: '14d', value: '14d' }, + { selected: false, text: '30d', value: '30d' }, + ], + auto=true, + ) + ).addPanels( + [ + u.addRowSchema(collapse=false, showTitle=true, title='CLUSTER STATE') + { gridPos: { x: 0, y: 0, w: 24, h: 1 } }, + u.addStatPanel( + title='Ceph health status', + unit='none', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 0, y: 1, w: 3, h: 3 }, + colorMode='value', + interval='1m', + transparent=true, + color={ mode: 'thresholds' }, + thresholdsMode='absolute', + pluginVersion='9.4.7' + ).addMappings([ + { + options: { + '0': { text: 'HEALTHY' }, + '1': { text: 'WARNING' }, + '2': { text: 'ERROR' }, + }, + type: 'value', + }, + { options: { match: null, result: { text: 'N/A' } }, type: 'special' }, + ]) + .addThresholds([ + { color: '#9ac48a' }, + { color: 'rgba(237, 129, 40, 0.89)', value: 1 }, + { color: 'rgba(245, 54, 54, 0.9)', value: 2 }, + ]) + .addTarget(u.addTargetSchema( + expr='ceph_health_status{}', + instant=true, + interval='$interval', + datasource='${DS_PROMETHEUS}', + step=300, + )), + + u.addGaugePanel( + title='Available Capacity', + gridPosition={ h: 6, w: 3, x: 3, y: 1 }, + unit='percentunit', + max=1, + min=0, + interval='1m', + pluginVersion='9.4.7' + ).addMappings([ + { options: { match: null, result: { text: 'N/A' } }, type: 'special' }, + ]) + .addThresholds([ + { color: 'rgba(245, 54, 54, 0.9)' }, + { color: 'rgba(237, 129, 40, 0.89)', value: 0.1 }, + { color: 'rgba(50, 172, 45, 0.97)', value: 0.3 }, + ]) + .addTarget(u.addTargetSchema( + expr='(ceph_cluster_total_bytes{}-ceph_cluster_total_used_bytes{})/ceph_cluster_total_bytes{}', + instant=true, + interval='$interval', + datasource='${DS_PROMETHEUS}', + step=300 + )), + + u.addStatPanel( + title='Cluster Capacity', + unit='decbytes', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 6, y: 1, w: 3, h: 3 }, + graphMode='area', + decimals=2, + interval='1m', + color={ fixedColor: 'rgb(31, 120, 193)', mode: 'fixed' }, + thresholdsMode='absolute', + pluginVersion='9.4.7', + ).addMappings([ + { options: { match: null, result: { text: 'N/A' } }, type: 'special' }, + ]).addThresholds([ + { color: 'rgba(50, 172, 45, 0.97)' }, + { color: 'rgba(237, 129, 40, 0.89)', value: 0.025 }, + { color: 'rgba(245, 54, 54, 0.9)', value: 1.0 }, + ]) + .addTarget(u.addTargetSchema( + expr='ceph_cluster_total_bytes{}', + instant=true, + interval='$interval', + datasource='${DS_PROMETHEUS}', + step=300 + )), + + u.addStatPanel( + title='Write Throughput', + unit='Bps', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 9, y: 1, w: 3, h: 3 }, + decimals=1, + color={ mode: 'thresholds' }, + thresholdsMode='absolute', + pluginVersion='9.4.7', + ).addMappings([ + { options: { match: null, result: { text: 'N/A' } }, type: 'special' }, + ]).addThresholds([ + { color: 'green' }, + ]) + .addTarget(u.addTargetSchema( + expr='sum(irate(ceph_osd_op_w_in_bytes{}[5m]))', + instant=true, + interval='$interval', + datasource='${DS_PROMETHEUS}', + )), + + u.addStatPanel( + title='Read Throughput', + unit='Bps', + datasource='${DS_PROMETHEUS}', + gridPosition={ x: 12, y: 1, w: 3, h: 3 }, + decimals=1, + color={ mode: 'thresholds' }, + thresholdsMode='absolute', + pluginVersion='9.4.7', + ).addMappings([ + { options: { match: null, result: { text: 'N/A' } }, type: 'special' }, + ]).addThresholds([ + { color: '#d44a3a' }, + { color: 'rgba(237, 129, 40, 0.89)', value: 0 }, + { color: '#9ac48a', value: 0 }, + ]) + .addTarget(u.addTargetSchema( + expr='sum(irate(ceph_osd_op_r_out_bytes{}[5m]))', + instant=true, + interval='$interval', + datasource='${DS_PROMETHEUS}', + )), + + u.addStatPanel( + title='OSDs', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 3, w: 6, x: 15, y: 1 }, + color={ mode: 'thresholds' }, + thresholdsMode='absolute', + orientation='auto', + rootColorMode='Panel', + displayName='', + rootColors={ + crit: 'rgb(255, 0, 0)', + disable: 'rgba(128, 128, 128, 0.9)', + ok: 'rgba(50, 128, 45, 0.9)', + warn: 'rgba(237, 129, 40, 0.9)', + }, + cornerRadius=0, + flipCard=false, + flipTime=5, + isAutoScrollOnOverflow=false, + isGrayOnNoData=false, + isHideAlertsOnDisable=false, + isIgnoreOKColors=false, + fontFormat='Regular', + colorMode='background', + unit='none', + pluginVersion='9.4.7', + ) + .addThresholds([ + { color: 'green', value: null }, + { color: 'red', value: 80 }, + ]) + .addTargets([ + u.addTargetSchema( + aggregation='Last', + alias='All', + decimals=2, + displayAliasType='Always', + displayType='Regular', + displayValueWithAlias='When Alias Displayed', + units='none', + valueHandler='Number Threshold', + expr='count(ceph_osd_metadata)', + legendFormat='All', + interval='$interval', + datasource='${DS_PROMETHEUS}', + ), + u.addTargetSchema( + aggregation='Last', + alias='In', + decimals=2, + displayAliasType='Always', + displayType='Regular', + displayValueWithAlias='When Alias Displayed', + units='none', + valueHandler='Number Threshold', + expr='count(ceph_osd_in)', + legendFormat='In', + interval='$interval', + datasource='${DS_PROMETHEUS}', + ), + u.addTargetSchema( + aggregation='Last', + alias='Out', + decimals=2, + displayAliasType='Warning / Critical', + displayType='Regular', + displayValueWithAlias='When Alias Displayed', + units='none', + valueHandler='Number Threshold', + expr='sum(ceph_osd_in == bool 0)', + legendFormat='Out', + interval='', + warn=1, + datasource='${DS_PROMETHEUS}', + ), + u.addTargetSchema( + aggregation='Last', + alias='Up', + decimals=2, + displayAliasType='Always', + displayType='Regular', + displayValueWithAlias='When Alias Displayed', + units='none', + valueHandler='Number Threshold', + expr='sum(ceph_osd_up)', + legendFormat='Up', + interval='', + datasource='${DS_PROMETHEUS}', + ), + u.addTargetSchema( + aggregation='Last', + alias='Down', + decimals=2, + displayAliasType='Warning / Critical', + displayType='Regular', + displayValueWithAlias='When Alias Displayed', + units='none', + valueHandler='Number Threshold', + expr='sum(ceph_osd_up == bool 0)', + legendFormat='Down', + interval='', + warn=1, + datasource='${DS_PROMETHEUS}', + ), + ]), + + u.addStatPanel( + title='MGRs', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 6, w: 3, x: 21, y: 1 }, + color={ mode: 'thresholds' }, + thresholdsMode='absolute', + orientation='auto', + rootColorMode='Panel', + displayName='', + rootColors={ + crit: 'rgba(245, 54, 54, 0.9)', + disable: 'rgba(128, 128, 128, 0.9)', + ok: 'rgba(50, 128, 45, 0.9)', + warn: 'rgba(237, 129, 40, 0.9)', + }, + cornerRadius=1, + flipCard=false, + flipTime=5, + isAutoScrollOnOverflow=false, + isGrayOnNoData=false, + isHideAlertsOnDisable=false, + isIgnoreOKColors=false, + fontFormat='Regular', + colorMode='background', + unit='none', + pluginVersion='9.4.7', + ) + .addThresholds([ + { color: 'green', value: null }, + { color: 'red', value: 80 }, + ]) + .addTargets([ + u.addTargetSchema( + aggregation='Last', + alias='Active', + decimals=2, + displayAliasType='Always', + displayType='Regular', + displayValueWithAlias='When Alias Displayed', + units='none', + valueHandler='Number Threshold', + expr='count(ceph_mgr_status == 1) or vector(0)', + legendFormat='Active', + datasource='${DS_PROMETHEUS}', + instant=true, + ), + u.addTargetSchema( + aggregation='Last', + alias='Standby', + decimals=2, + displayAliasType='Always', + displayType='Regular', + displayValueWithAlias='When Alias Displayed', + units='none', + valueHandler='Number Threshold', + expr='count(ceph_mgr_status == 0) or vector(0)', + legendFormat='Standby', + datasource='${DS_PROMETHEUS}', + instant=true, + ), + ]), + + u.addStatPanel( + title='Firing Alerts', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 3, w: 3, x: 0, y: 4 }, + color={ mode: 'thresholds' }, + thresholdsMode='absolute', + orientation='auto', + rootColorMode='Panel', + displayName='', + rootColors={ + crit: 'rgba(245, 54, 54, 0.9)', + disable: 'rgba(128, 128, 128, 0.9)', + ok: 'rgba(50, 128, 45, 0.9)', + warn: 'rgba(237, 129, 40, 0.9)', + }, + cornerRadius=1, + flipCard=false, + flipTime=5, + isAutoScrollOnOverflow=false, + isGrayOnNoData=false, + isHideAlertsOnDisable=false, + isIgnoreOKColors=false, + fontFormat='Regular', + colorMode='background', + unit='none', + pluginVersion='9.4.7', + ) + .addThresholds([ + { color: 'green', value: null }, + { color: 'red', value: 1 }, + ]) + .addOverrides([ + { matcher: { id: 'byName', options: 'Critical' }, properties: [ + { id: 'color', value: { fixedColor: 'red', mode: 'fixed' } }, + ] }, + { matcher: { id: 'byName', options: 'Warning' }, properties: [ + { id: 'color', value: { fixedColor: '#987d24', mode: 'fixed' } }, + ] }, + ]) + .addTargets([ + u.addTargetSchema( + aggregation='Last', + alias='Active', + decimals=2, + displayAliasType='Always', + displayType='Regular', + displayValueWithAlias='When Alias Displayed', + units='none', + valueHandler='Number Threshold', + expr='count(ALERTS{alertstate="firing",alertname=~"^Ceph.+", severity="critical"}) OR vector(0)', + legendFormat='Critical', + datasource='${DS_PROMETHEUS}', + instant=true, + ), + u.addTargetSchema( + aggregation='Last', + alias='Standby', + decimals=2, + displayAliasType='Always', + displayType='Regular', + displayValueWithAlias='When Alias Displayed', + units='none', + valueHandler='Number Threshold', + expr='count(ALERTS{alertstate="firing",alertname=~"^Ceph.+", severity="warning"}) OR vector(0)', + legendFormat='Warning', + datasource='${DS_PROMETHEUS}', + instant=true, + ), + ]), + + u.addStatPanel( + title='Used Capacity', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 3, w: 3, x: 6, y: 4 }, + color={ mode: 'thresholds' }, + thresholdsMode='absolute', + orientation='horizontal', + graphMode='area', + displayName='', + maxDataPoints=100, + colorMode='none', + unit='decbytes', + pluginVersion='9.4.7', + ) + .addMappings([ + { options: { result: { text: 'N/A' } }, type: 'special' }, + ]) + .addThresholds([ + { color: 'rgba(50, 172, 45, 0.97)', value: null }, + { color: 'rgba(237, 129, 40, 0.89)', value: 0.025 }, + { color: 'rgba(245, 54, 54, 0.9)', value: 0.1 }, + ]) + .addTargets([ + u.addTargetSchema( + expr='ceph_cluster_total_used_bytes{}', + legendFormat='', + datasource='${DS_PROMETHEUS}', + instant=true, + ), + ]), + + u.addStatPanel( + title='Write IOPS', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 3, w: 3, x: 9, y: 4 }, + color={ mode: 'thresholds' }, + thresholdsMode='absolute', + orientation='horizontal', + graphMode='area', + displayName='', + maxDataPoints=100, + colorMode='none', + unit='ops', + pluginVersion='9.4.7', + ) + .addMappings([ + { options: { result: { text: 'N/A' } }, type: 'special' }, + ]) + .addThresholds([ + { color: 'green', value: null }, + ]) + .addTargets([ + u.addTargetSchema( + expr='sum(irate(ceph_osd_op_w{}[1m]))', + legendFormat='', + datasource='${DS_PROMETHEUS}', + instant=true, + ), + ]), + + u.addStatPanel( + title='Read IOPS', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 3, w: 3, x: 12, y: 4 }, + color={ mode: 'thresholds' }, + thresholdsMode='absolute', + orientation='horizontal', + graphMode='area', + displayName='', + maxDataPoints=100, + colorMode='none', + unit='ops', + pluginVersion='9.4.7', + ) + .addMappings([ + { options: { result: { text: 'N/A' } }, type: 'special' }, + ]) + .addThresholds([ + { color: '#d44a3a', value: null }, + { color: 'rgba(237, 129, 40, 0.89)', value: 0 }, + { color: '#9ac48a', value: 0 }, + ]) + .addTargets([ + u.addTargetSchema( + expr='sum(irate(ceph_osd_op_r{}[1m]))', + legendFormat='', + datasource='${DS_PROMETHEUS}', + instant=true, + ), + ]), + + u.addStatPanel( + title='Monitors', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 3, w: 6, x: 15, y: 4 }, + color={ mode: 'thresholds' }, + thresholdsMode='absolute', + orientation='auto', + rootColorMode='Panel', + displayName='', + rootColors={ + crit: 'rgba(245, 54, 54, 0.9)', + disable: 'rgba(128, 128, 128, 0.9)', + ok: 'rgba(50, 128, 45, 0.9)', + warn: 'rgba(237, 129, 40, 0.9)', + }, + cornerRadius=1, + flipCard=false, + flipTime=5, + isAutoScrollOnOverflow=false, + isGrayOnNoData=false, + isHideAlertsOnDisable=false, + isIgnoreOKColors=false, + fontFormat='Regular', + colorMode='background', + unit='none', + pluginVersion='9.4.7', + ) + .addThresholds([ + { color: 'green', value: null }, + { color: 'red', value: 80 }, + ]) + .addTargets([ + u.addTargetSchema( + aggregation='Last', + alias='In Quorum', + decimals=2, + displayAliasType='Always', + displayType='Regular', + displayValueWithAlias='When Alias Displayed', + units='none', + valueHandler='Text Only', + expr='sum(ceph_mon_quorum_status)', + legendFormat='In Quorum', + datasource='${DS_PROMETHEUS}', + ), + u.addTargetSchema( + aggregation='Last', + alias='Total', + crit=1, + decimals=2, + displayAliasType='Always', + displayType='Regular', + displayValueWithAlias='When Alias Displayed', + units='none', + valueHandler='Text Only', + expr='count(ceph_mon_quorum_status)', + legendFormat='Total', + datasource='${DS_PROMETHEUS}', + warn=2, + ), + u.addTargetSchema( + aggregation='Last', + alias='MONs out of Quorum', + crit=1.6, + decimals=2, + displayAliasType='Warning / Critical', + displayType='Annotation', + displayValueWithAlias='Never', + units='none', + valueHandler='Number Threshold', + expr='count(ceph_mon_quorum_status) - sum(ceph_mon_quorum_status)', + legendFormat='MONs out of Quorum', + datasource='${DS_PROMETHEUS}', + warn=1.1, + range=true, + ), + ]), + u.addRowSchema(collapse=false, showTitle=true, title='CLUSTER STATS') + { gridPos: { x: 0, y: 7, w: 24, h: 1 } }, + u.addAlertListPanel( + title='Alerts', + datasource={ + type: 'datasource', + uid: 'grafana', + }, + gridPosition={ h: 8, w: 8, x: 0, y: 8 }, + alertInstanceLabelFilter='{alertname=~"^Ceph.+"}', + alertName='', + dashboardAlerts=false, + groupBy=[], + groupMode='default', + maxItems=20, + sortOrder=1, + stateFilter={ + 'error': true, + firing: true, + noData: false, + normal: false, + pending: true, + }, + ), + + u.timeSeriesPanel( + title='Capacity', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 8, w: 8, x: 8, y: 8 }, + fillOpacity=40, + pointSize=5, + showPoints='never', + unit='bytes', + displayMode='table', + tooltip={ mode: 'multi', sort: 'desc' }, + interval='$interval', + stackingMode='normal', + spanNulls=true, + decimals=2, + thresholdsMode='percentage', + sortBy='Last', + sortDesc=true, + ) + .addCalcs(['last']) + .addThresholds([ + { color: 'green', value: null }, + { color: '#c0921f', value: 75 }, + { color: '#E02F44', value: 85 }, + ]) + .addOverrides( + [ + { + matcher: { id: 'byName', options: 'Total Capacity' }, + properties: [{ + id: 'color', + value: { fixedColor: 'red', mode: 'fixed' }, + }], + }, + { + matcher: { id: 'byName', options: 'Used' }, + properties: [ + { + id: 'color', + value: { fixedColor: 'green', mode: 'fixed' }, + }, + { + id: 'custom.thresholdsStyle', + value: { mode: 'dashed' }, + }, + ], + }, + ] + ) + .addTargets( + [ + u.addTargetSchema( + expr='ceph_cluster_total_bytes{}', + datasource='${DS_PROMETHEUS}', + interval='$interval', + instant=false, + legendFormat='Total Capacity', + step=300, + range=true, + ), + u.addTargetSchema( + expr='ceph_cluster_total_used_bytes{}', + datasource='${DS_PROMETHEUS}', + interval='$interval', + instant=false, + legendFormat='Used', + step=300, + range=true, + ), + ] + ), + + u.timeSeriesPanel( + title='Cluster Throughput', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 8, w: 8, x: 16, y: 8 }, + fillOpacity=10, + pointSize=5, + lineWidth=1, + showPoints='never', + unit='decbytes', + displayMode='table', + tooltip={ mode: 'multi', sort: 'desc' }, + interval='$interval', + stackingMode='normal', + spanNulls=true, + decimals=null, + thresholdsMode='absolute', + ).addCalcs(['mean', 'lastNotNull', 'max', 'min']) + .addThresholds([ + { color: 'green', value: null }, + { color: 'red', value: 85 }, + ]) + .addTargets( + [ + u.addTargetSchema( + expr='sum(irate(ceph_osd_op_w_in_bytes{}[5m]))', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Write', + step=300, + range=true, + ), + u.addTargetSchema( + expr='sum(irate(ceph_osd_op_r_out_bytes{}[5m]))', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Read', + step=300, + range=true, + ), + ] + ), + + u.timeSeriesPanel( + title='IOPS', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 8, w: 8, x: 0, y: 16 }, + fillOpacity=10, + pointSize=5, + lineWidth=1, + showPoints='never', + unit='decbytes', + displayMode='table', + tooltip={ mode: 'multi', sort: 'desc' }, + interval='$interval', + stackingMode='normal', + spanNulls=true, + decimals=null, + thresholdsMode='absolute', + ) + .addCalcs(['mean', 'lastNotNull', 'max', 'min']) + .addThresholds([ + { color: 'green', value: null }, + { color: 'red', value: 80 }, + ]) + .addTargets( + [ + u.addTargetSchema( + expr='sum(irate(ceph_osd_op_w{}[1m]))', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Write', + step=300, + range=true, + ), + u.addTargetSchema( + expr='sum(irate(ceph_osd_op_r{}[1m]))', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Read', + step=300, + range=true, + ), + ] + ), + + u.timeSeriesPanel( + title='Pool Used Bytes', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 8, w: 8, x: 8, y: 16 }, + fillOpacity=10, + pointSize=5, + lineWidth=1, + showPoints='never', + unit='bytes', + tooltip={ mode: 'multi', sort: 'desc' }, + interval='$interval', + stackingMode='normal', + spanNulls=true, + decimals=null, + thresholdsMode='absolute', + displayMode='list', + placement='right', + ) + .addThresholds([ + { color: 'green', value: null }, + { color: 'red', value: 80 }, + ]) + .addTargets( + [ + u.addTargetSchema( + expr='(ceph_pool_bytes_used{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='{{name}}', + step=300, + ), + ] + ), + + u.timeSeriesPanel( + title='Pool Used RAW Bytes', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 8, w: 8, x: 16, y: 16 }, + fillOpacity=10, + pointSize=5, + lineWidth=1, + showPoints='never', + unit='bytes', + tooltip={ mode: 'multi', sort: 'desc' }, + interval='$interval', + stackingMode='normal', + spanNulls=true, + decimals=null, + thresholdsMode='absolute', + displayMode='table', + placement='right', + ) + .addThresholds([ + { color: 'green', value: null }, + ]) + .addOverrides( + [ + { + matcher: { id: 'byName', options: 'rbd Stored' }, + properties: [{ + id: 'color', + value: { fixedColor: 'transparent', mode: 'fixed' }, + }], + }, + ] + ) + .addTargets( + [ + u.addTargetSchema( + expr='(ceph_pool_stored_raw{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})', + datasource='${DS_PROMETHEUS}', + interval='', + legendFormat='{{name}}', + step=300, + range=true, + hide=false, + ), + ] + ), + + u.timeSeriesPanel( + title='Pool Objects Quota', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 7, w: 8, x: 0, y: 24 }, + fillOpacity=10, + pointSize=5, + lineWidth=1, + showPoints='never', + unit='short', + tooltip={ mode: 'multi', sort: 'none' }, + interval='$interval', + stackingMode='none', + spanNulls=true, + decimals=null, + thresholdsMode='absolute', + displayMode='list', + placement='bottom', + ) + .addThresholds([ + { color: 'green', value: null }, + { color: 'red', value: 80 }, + ]) + .addTargets( + [ + u.addTargetSchema( + expr='(ceph_pool_quota_objects{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})', + datasource='${DS_PROMETHEUS}', + interval='', + legendFormat='{{name}}', + step=300, + ), + ] + ), + + u.timeSeriesPanel( + title='Pool Quota Bytes', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 7, w: 8, x: 8, y: 24 }, + fillOpacity=10, + pointSize=5, + lineWidth=1, + showPoints='never', + unit='bytes', + tooltip={ mode: 'multi', sort: 'none' }, + interval='$interval', + stackingMode='none', + spanNulls=true, + decimals=null, + thresholdsMode='absolute', + displayMode='list', + placement='bottom', + ) + .addThresholds([ + { color: 'green', value: null }, + { color: 'red', value: 80 }, + ]) + .addTargets( + [ + u.addTargetSchema( + expr='(ceph_pool_quota_bytes{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})', + datasource='${DS_PROMETHEUS}', + interval='', + legendFormat='{{name}}', + step=300, + ), + ] + ), + + u.timeSeriesPanel( + title='Objects Per Pool', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 7, w: 8, x: 16, y: 24 }, + fillOpacity=10, + pointSize=5, + lineWidth=1, + showPoints='never', + unit='short', + tooltip={ mode: 'multi', sort: 'none' }, + interval='$interval', + stackingMode='normal', + spanNulls=false, + decimals=null, + thresholdsMode='absolute', + displayMode='list', + placement='right', + ) + .addThresholds([ + { color: 'green', value: null }, + { color: 'red', value: 80 }, + ]) + .addTargets( + [ + u.addTargetSchema( + expr='(ceph_pool_objects{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})', + datasource='${DS_PROMETHEUS}', + interval='', + legendFormat='{{name}}', + ), + ] + ), + + u.addRowSchema(collapse=false, showTitle=true, title='OBJECTS') + { gridPos: { x: 0, y: 31, w: 24, h: 1 } }, + + u.timeSeriesPanel( + title='OSD Type Count', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 12, w: 6, x: 0, y: 32 }, + fillOpacity=10, + pointSize=5, + lineWidth=2, + showPoints='never', + unit='short', + tooltip={ mode: 'multi', sort: 'asc' }, + interval='$interval', + stackingMode='normal', + spanNulls=true, + decimals=null, + thresholdsMode='absolute', + displayMode='list', + placement='bottom', + showLegend=false, + ) + .addThresholds([ + { color: 'green' }, + { color: 'red', value: 80 }, + ]) + .addOverrides( + [ + { + matcher: { id: 'byRegexp', options: '/^Total.*$/' }, + properties: [{ + id: 'custom.stacking', + value: { group: false, mode: 'normal' }, + }], + }, + ] + ) + .addTargets( + [ + u.addTargetSchema( + expr='sum(ceph_pool_objects)', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Total', + range=true, + step=200 + ), + ] + ), + + u.timeSeriesPanel( + title='PGs State', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 12, w: 8, x: 6, y: 32 }, + fillOpacity=10, + pointSize=5, + lineWidth=2, + showPoints='never', + unit='short', + tooltip={ mode: 'multi', sort: 'asc' }, + interval='$interval', + stackingMode='normal', + spanNulls=true, + decimals=null, + thresholdsMode='absolute', + displayMode='table', + placement='right', + showLegend=true, + ) + .addThresholds([ + { color: 'green' }, + { color: 'red', value: 80 }, + ]) + .addCalcs(['lastNotNull']) + .addOverrides( + [ + { + matcher: { id: 'byRegexp', options: '/^Total.*$/' }, + properties: [{ + id: 'custom.stacking', + value: { group: false, mode: 'normal' }, + }], + }, + ] + ) + .addTargets( + [ + u.addTargetSchema( + expr='sum(ceph_pg_active{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Active', + range=true, + ), + u.addTargetSchema( + expr='sum(ceph_pg_clean{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Clean', + range=true, + ), + u.addTargetSchema( + expr='sum(ceph_pg_peering{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Peering', + range=true, + ), + u.addTargetSchema( + expr='sum(ceph_pg_degraded{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Degraded', + range=true, + step=300, + ), + u.addTargetSchema( + expr='sum(ceph_pg_stale{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Stale', + range=true, + step=300, + ), + u.addTargetSchema( + expr='sum(ceph_unclean_pgs{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Unclean', + range=true, + step=300, + ), + u.addTargetSchema( + expr='sum(ceph_pg_undersized{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Undersized', + range=true, + step=300, + ), + u.addTargetSchema( + expr='sum(ceph_pg_incomplete{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Incomplete', + range=true, + ), + u.addTargetSchema( + expr='sum(ceph_pg_forced_backfill{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Forced Backfill', + range=true, + ), + u.addTargetSchema( + expr='sum(ceph_pg_forced_recovery{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Forced Recovery', + range=true, + ), + u.addTargetSchema( + expr='sum(ceph_pg_creating{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Creating', + range=true, + ), + u.addTargetSchema( + expr='sum(ceph_pg_wait_backfill{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Wait Backfill', + range=true, + ), + u.addTargetSchema( + expr='sum(ceph_pg_deep{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Deep', + range=true, + ), + u.addTargetSchema( + expr='sum(ceph_pg_scrubbing{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Scrubbing', + range=true, + ), + u.addTargetSchema( + expr='sum(ceph_pg_recovering{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Recovering', + range=true, + ), + u.addTargetSchema( + expr='sum(ceph_pg_repair{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Repair', + range=true, + ), + u.addTargetSchema( + expr='sum(ceph_pg_down{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Down', + range=true, + ), + u.addTargetSchema( + expr='sum(ceph_pg_peered{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Peered', + range=true, + ), + u.addTargetSchema( + expr='sum(ceph_pg_backfill{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Backfill', + range=true, + ), + u.addTargetSchema( + expr='sum(ceph_pg_remapped{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Remapped', + range=true, + ), + u.addTargetSchema( + expr='sum(ceph_pg_backfill_toofull{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Backfill Toofull', + range=true, + ), + ] + ), + + u.timeSeriesPanel( + title='Stuck PGs', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 6, w: 10, x: 14, y: 32 }, + fillOpacity=10, + pointSize=5, + lineWidth=2, + showPoints='never', + unit='short', + tooltip={ mode: 'multi', sort: 'asc' }, + interval='$interval', + stackingMode='normal', + spanNulls=true, + decimals=null, + thresholdsMode='absolute', + displayMode='table', + placement='right', + showLegend=true, + ) + .addCalcs(['mean', 'lastNotNull']) + .addThresholds([ + { color: 'green' }, + { color: 'red', value: 80 }, + ]) + .addOverrides( + [ + { + matcher: { id: 'byRegexp', options: '/^Total.*$/' }, + properties: [{ + id: 'custom.stacking', + value: { group: false, mode: 'normal' }, + }], + }, + ] + ) + .addTargets([ + u.addTargetSchema( + expr='sum(ceph_pg_degraded{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Degraded', + range=true, + step=300, + ), + u.addTargetSchema( + expr='sum(ceph_pg_stale{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Stale', + range=true, + step=300, + ), + u.addTargetSchema( + expr='sum(ceph_pg_undersized{})', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='Undersized', + range=true, + step=300, + ), + ]), + + u.timeSeriesPanel( + title='Recovery Operations', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 6, w: 10, x: 14, y: 38 }, + fillOpacity=10, + pointSize=5, + lineWidth=2, + showPoints='never', + unit='short', + tooltip={ mode: 'multi', sort: 'none' }, + interval='$interval', + stackingMode='none', + spanNulls=true, + decimals=null, + thresholdsMode='absolute', + displayMode='list', + placement='bottom', + showLegend=false, + ) + .addThresholds([ + { color: 'green' }, + { color: 'red', value: 80 }, + ]) + .addTargets([ + u.addTargetSchema( + expr='sum(irate(ceph_osd_recovery_ops{}[$interval]))', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='OPS', + step=300, + ), + ]), + u.addRowSchema(false, true, 'LATENCY', collapsed=true) + .addPanels([ + u.heatMapPanel( + title='OSD Apply Latency Distribution', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 8, w: 12, x: 0, y: 42 }, + colorMode='opacity', + legendShow=true, + optionsCalculate=true, + optionsColor={ + exponent: 0.5, + fill: '#b4ff00', + mode: 'opacity', + reverse: false, + scale: 'exponential', + scheme: 'Oranges', + steps: 128, + }, + optionsExemplars={ color: 'rgba(255,0,255,0.7)' }, + optionsFilterValues={ le: 1e-9 }, + optionsLegend={ show: true }, + optionsRowFrame={ layout: 'auto' }, + optionsToolTip={ + show: true, + yHistogram: false, + }, + optionsYAxis={ + axisPlacement: 'left', + min: '0', + reverse: false, + unit: 'ms', + }, + xBucketSize='', + yAxisFormat='ms', + yAxisLogBase=2, + yAxisMin='0', + yBucketSize=10, + pluginVersion='9.4.7', + ).addTarget(u.addTargetSchema( + expr='ceph_osd_apply_latency_ms{}', + datasource='${DS_PROMETHEUS}', + interval='$interval', + instant=false, + )), + u.heatMapPanel( + title='OSD Commit Latency Distribution', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 8, w: 12, x: 12, y: 42 }, + colorMode='opacity', + legendShow=true, + cardColor='#65c5db', + optionsColor={ + exponent: 0.5, + fill: '#65c5db', + mode: 'opacity', + reverse: false, + scale: 'exponential', + scheme: 'Oranges', + steps: 128, + }, + optionsCalculate=true, + optionsCalculation={ + yBuckets: { + mode: 'count', + scale: { log: 2, type: 'log' }, + }, + }, + optionsExemplars={ color: 'rgba(255,0,255,0.7)' }, + optionsFilterValues={ le: 1e-9 }, + optionsLegend={ show: true }, + optionsRowFrame={ layout: 'auto' }, + optionsToolTip={ + show: true, + yHistogram: false, + }, + optionsYAxis={ + axisPlacement: 'left', + min: '0', + reverse: false, + unit: 'ms', + }, + xBucketSize='', + yAxisFormat='ms', + yAxisLogBase=2, + yAxisMin='0', + yBucketSize=10, + pluginVersion='9.4.7', + ).addTarget(u.addTargetSchema( + expr='ceph_osd_commit_latency_ms{}', + datasource='${DS_PROMETHEUS}', + interval='$interval', + instant=false, + )), + u.heatMapPanel( + title='OSD Read Op Latency Distribution', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 8, w: 12, x: 0, y: 50 }, + colorMode='opacity', + legendShow=true, + cardColor='#806eb7', + optionsColor={ + exponent: 0.5, + fill: '#806eb7', + mode: 'opacity', + reverse: false, + scale: 'exponential', + scheme: 'Oranges', + steps: 128, + }, + optionsCalculate=true, + optionsCalculation={ + yBuckets: { + mode: 'count', + scale: { log: 2, type: 'log' }, + }, + }, + optionsExemplars={ color: 'rgba(255,0,255,0.7)' }, + optionsFilterValues={ le: 1e-9 }, + optionsLegend={ show: true }, + optionsRowFrame={ layout: 'auto' }, + optionsToolTip={ + show: true, + yHistogram: false, + }, + optionsYAxis={ + axisPlacement: 'left', + decimals: 2, + min: '0', + reverse: false, + unit: 'ms', + }, + xBucketSize='', + yAxisFormat='ms', + yAxisLogBase=2, + yAxisMin='0', + yBucketSize=null, + pluginVersion='9.4.7', + ).addTarget(u.addTargetSchema( + expr='rate(ceph_osd_op_r_latency_sum{}[5m]) / rate(ceph_osd_op_r_latency_count{}[5m]) >= 0', + datasource='${DS_PROMETHEUS}', + interval='$interval', + instant=false, + )), + + u.heatMapPanel( + title='OSD Write Op Latency Distribution', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 8, w: 12, x: 12, y: 50 }, + colorMode='opacity', + legendShow=true, + cardColor='#f9934e', + optionsColor={ + exponent: 0.5, + fill: '#f9934e', + mode: 'opacity', + reverse: false, + scale: 'exponential', + scheme: 'Oranges', + steps: 128, + }, + optionsCalculate=true, + optionsCalculation={ + yBuckets: { + mode: 'count', + scale: { log: 2, type: 'log' }, + }, + }, + optionsExemplars={ color: 'rgba(255,0,255,0.7)' }, + optionsFilterValues={ le: 1e-9 }, + optionsLegend={ show: true }, + optionsRowFrame={ layout: 'auto' }, + optionsToolTip={ + show: true, + yHistogram: false, + }, + optionsYAxis={ + axisPlacement: 'left', + decimals: 2, + min: '0', + reverse: false, + unit: 'ms', + }, + xBucketSize='', + yAxisFormat='ms', + yAxisLogBase=2, + yAxisMin='0', + yBucketSize=null, + pluginVersion='9.4.7', + ).addTarget(u.addTargetSchema( + expr='rate(ceph_osd_op_w_latency_sum{}[5m]) / rate(ceph_osd_op_w_latency_count{}[5m]) >= 0', + datasource='${DS_PROMETHEUS}', + interval='$interval', + legendFormat='', + instant=false, + )), + u.timeSeriesPanel( + title='Recovery Operations', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 7, w: 12, x: 0, y: 58 }, + fillOpacity=10, + pointSize=5, + lineWidth=1, + showPoints='never', + unit='ms', + tooltip={ mode: 'multi', sort: 'none' }, + interval='$interval', + stackingMode='none', + spanNulls=false, + decimals=null, + thresholdsMode='absolute', + displayMode='table', + placement='bottom', + showLegend=true, + ) + .addThresholds([ + { color: 'green' }, + { color: 'red', value: 80 }, + ]) + .addTargets([ + u.addTargetSchema( + expr='avg(rate(ceph_osd_op_r_latency_sum{}[5m]) / rate(ceph_osd_op_r_latency_count{}[5m]) >= 0)', + datasource='${DS_PROMETHEUS}', + legendFormat='Read', + ), + u.addTargetSchema( + expr='avg(rate(ceph_osd_op_w_latency_sum{}[5m]) / rate(ceph_osd_op_w_latency_count{}[5m]) >= 0)', + datasource='${DS_PROMETHEUS}', + legendFormat='Write', + ), + ]), + + u.timeSeriesPanel( + title='AVG OSD Apply + Commit Latency', + datasource='${DS_PROMETHEUS}', + gridPosition={ h: 7, w: 12, x: 12, y: 58 }, + fillOpacity=10, + pointSize=5, + lineWidth=1, + showPoints='never', + unit='ms', + tooltip={ mode: 'multi', sort: 'none' }, + interval='$interval', + stackingMode='none', + spanNulls=false, + decimals=null, + thresholdsMode='absolute', + displayMode='table', + placement='bottom', + showLegend=true, + ) + .addCalcs(['lastNotNull', 'max']) + .addThresholds([ + { color: 'green' }, + { color: 'red', value: 80 }, + ]) + .addTargets([ + u.addTargetSchema( + expr='avg(ceph_osd_apply_latency_ms{})', + datasource='${DS_PROMETHEUS}', + legendFormat='apply', + interval='$interval', + metric='ceph_osd_perf_apply_latency_seconds', + step=4, + ), + u.addTargetSchema( + expr='avg(ceph_osd_commit_latency_ms{})', + datasource='${DS_PROMETHEUS}', + legendFormat='commit', + interval='$interval', + metric='ceph_osd_perf_commit_latency_seconds', + step=4, + ), + ]), + ]) + + { gridPos: { x: 0, y: 44, w: 24, h: 1 } }, + u.addRowSchema(collapse=true, showTitle=true, title='', collapsed=false) + { gridPos: { x: 0, y: 45, w: 24, h: 1 } }, + + u.addTableExtended( + datasource='${DS_PROMETHEUS}', + title='Ceph Versions', + gridPosition={ h: 6, w: 24, x: 0, y: 46 }, + options={ + footer: { + fields: '', + reducer: ['sum'], + countRows: false, + enablePagination: false, + show: false, + }, + frameIndex: 1, + showHeader: true, + }, + custom={ align: 'left', cellOptions: { type: 'auto' }, filterable: false, inspect: false }, + thresholds={ + mode: 'absolute', + steps: [ + { color: 'green' }, + ], + }, + overrides=[{ + matcher: { id: 'byName', options: 'Time' }, + properties: [ + { id: 'custom.hidden', value: true }, + ], + }], + pluginVersion='9.4.7' + ) + .addTransformations([ + { + id: 'merge', + options: {}, + }, + { + id: 'organize', + options: { + excludeByName: {}, + indexByName: {}, + renameByName: { + Time: '', + 'Value #A': 'OSD Services', + 'Value #B': 'Mon Services', + 'Value #C': 'MDS Services', + 'Value #D': 'RGW Services', + 'Value #E': 'MGR Services', + ceph_version: 'Ceph Version', + }, + }, + }, + ]).addTargets([ + u.addTargetSchema( + expr='count by (ceph_version)(ceph_osd_metadata{})', + datasource='${DS_PROMETHEUS}', + format='table', + hide=false, + exemplar=false, + instant=true, + interval='', + legendFormat='OSD Services', + range=false, + ), + u.addTargetSchema( + expr='count by (ceph_version)(ceph_mon_metadata{})', + datasource='${DS_PROMETHEUS}', + format='table', + hide=false, + exemplar=false, + instant=true, + interval='', + legendFormat='Mon Services', + range=false, + ), + u.addTargetSchema( + expr='count by (ceph_version)(ceph_mds_metadata{})', + datasource='${DS_PROMETHEUS}', + format='table', + hide=false, + exemplar=false, + instant=true, + legendFormat='MDS Services', + range=false, + ), + u.addTargetSchema( + expr='count by (ceph_version)(ceph_rgw_metadata{})', + datasource='${DS_PROMETHEUS}', + format='table', + hide=false, + exemplar=false, + instant=true, + interval='', + legendFormat='RGW Services', + range=false, + ), + u.addTargetSchema( + expr='count by (ceph_version)(ceph_mgr_metadata{})', + datasource='${DS_PROMETHEUS}', + format='table', + hide=false, + exemplar=false, + instant=true, + interval='', + legendFormat='MGR Services', + range=false, + ), + ]), + + + ] //end panels + ), +} diff --git a/monitoring/ceph-mixin/dashboards/timeseries_panel.libsonnet b/monitoring/ceph-mixin/dashboards/timeseries_panel.libsonnet new file mode 100644 index 0000000000000..7da147cf56804 --- /dev/null +++ b/monitoring/ceph-mixin/dashboards/timeseries_panel.libsonnet @@ -0,0 +1,141 @@ +{ + /** + * Creates a [Time series panel](https://grafana.com/docs/grafana/latest/panels-visualizations/visualizations/time-series/). + * + * @name timeseries_panel.new + * + * @param title (default `''`) Panel title. + * @param description (default null) Panel description. + */ + new( + title='', + description=null, + pluginVersion='9.1.3', + gridPos={}, + datasource='', + colorMode='palette-classic', + axisCenteredZero=false, + axisColorMode='text', + axisLabel='', + axisPlacement='auto', + barAlignment=0, + drawStyle='line', + fillOpacity=0, + gradientMode='none', + lineInterpolation='linear', + lineWidth=0, + pointSize=0, + scaleDistributionType='linear', + showPoints='', + spanNulls=false, + stackingGroup='A', + stackingMode='none', + thresholdsStyleMode='off', + decimals=null, + thresholdsMode='absolute', + unit='none', + tooltip={}, + legend={}, + displayMode='list', + placement='bottom', + showLegend=true, + min=null, + scaleDistributionLog=null, + sortBy=null, + sortDesc=null, + ):: { + title: title, + type: 'timeseries', + [if description != null then 'description']: description, + pluginVersion: pluginVersion, + gridPos: gridPos, + datasource: datasource, + fieldConfig: { + defaults: { + color: { mode: colorMode }, + custom: { + axisCenteredZero: axisCenteredZero, + axisColorMode: axisColorMode, + axisLabel: axisLabel, + axisPlacement: axisPlacement, + barAlignment: barAlignment, + drawStyle: drawStyle, + fillOpacity: fillOpacity, + gradientMode: gradientMode, + hideFrom: { + legend: false, + tooltip: false, + viz: false, + }, + lineInterpolation: lineInterpolation, + lineWidth: lineWidth, + pointSize: pointSize, + scaleDistribution: { + [if scaleDistributionLog != null then 'scaleDistributionLog']: scaleDistributionLog, + type: scaleDistributionType, + }, + showPoints: showPoints, + spanNulls: spanNulls, + stacking: { + group: stackingGroup, + mode: stackingMode, + }, + thresholdsStyle: { + mode: thresholdsStyleMode, + }, + }, + [if decimals != null then 'decimals']: decimals, + [if min != null then 'min']: min, + thresholds: { + mode: thresholdsMode, + steps: [], + }, + unit: unit, + }, + overrides: [], + }, + options: { + legend: { + calcs: [], + displayMode: displayMode, + placement: placement, + showLegend: showLegend, + [if sortBy != null then 'sortBy']: sortBy, + [if sortDesc != null then 'sortDesc']: sortDesc, + }, + tooltip: tooltip, + }, + // Overrides + addOverride( + matcher=null, + properties=null, + ):: self { + fieldConfig+: { + overrides+: [ + { + [if matcher != null then 'matcher']: matcher, + [if properties != null then 'properties']: properties, + }, + ], + }, + }, + // thresholds + addThreshold(step):: self { + fieldConfig+: { defaults+: { thresholds+: { steps+: [step] } } }, + }, + addCalc(calc):: self { + options+: { legend+: { calcs+: [calc] } }, + }, + _nextTarget:: 0, + addTarget(target):: self { + // automatically ref id in added targets. + local nextTarget = super._nextTarget, + _nextTarget: nextTarget + 1, + targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }], + }, + addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self), + addThresholds(steps):: std.foldl(function(p, s) p.addThreshold(s), steps, self), + addCalcs(calcs):: std.foldl(function(p, t) p.addCalc(t), calcs, self), + addOverrides(overrides):: std.foldl(function(p, o) p.addOverride(o.matcher, o.properties), overrides, self), + }, +} diff --git a/monitoring/ceph-mixin/dashboards/utils.libsonnet b/monitoring/ceph-mixin/dashboards/utils.libsonnet index a7774c7ceed82..257b4a44f6608 100644 --- a/monitoring/ceph-mixin/dashboards/utils.libsonnet +++ b/monitoring/ceph-mixin/dashboards/utils.libsonnet @@ -1,4 +1,5 @@ local g = import 'grafonnet/grafana.libsonnet'; +local timeSeries = import 'timeseries_panel.libsonnet'; { _config:: error 'must provide _config', @@ -58,12 +59,53 @@ local g = import 'grafonnet/grafana.libsonnet'; legend_values=legend_values), - addTargetSchema(expr, legendFormat='', format='time_series', intervalFactor=1, instant=null):: + addTargetSchema( + expr, + legendFormat='', + format='time_series', + intervalFactor=1, + instant=null, + datasource=null, + step=null, + interval=null, + range=null, + hide=null, + metric=null, + aggregation=null, + alias=null, + decimals=null, + displayAliasType=null, + displayType=null, + displayValueWithAlias=null, + units=null, + valueHandler=null, + warn=null, + crit=null, + exemplar=null, + ):: g.prometheus.target(expr=expr, legendFormat=legendFormat, format=format, intervalFactor=intervalFactor, - instant=instant), + instant=instant, + datasource=datasource) + { + [if step != null then 'step']: step, + [if interval != null then 'interval']: interval, + [if range != null then 'range']: range, + [if hide != null then 'hide']: hide, + [if metric != null then 'metric']: metric, + [if aggregation != null then 'aggregation']: aggregation, + [if alias != null then 'alias']: alias, + [if decimals != null then 'decimals']: decimals, + [if displayAliasType != null then 'displayAliasType']: displayAliasType, + [if displayType != null then 'displayType']: displayType, + [if displayValueWithAlias != null then 'displayValueWithAlias']: displayValueWithAlias, + [if units != null then 'units']: units, + [if valueHandler != null then 'valueHandler']: valueHandler, + [if warn != null then 'warn']: warn, + [if crit != null then 'crit']: crit, + [if exemplar != null then 'exemplar']: exemplar, + }, addTemplateSchema(name, datasource, @@ -103,8 +145,15 @@ local g = import 'grafonnet/grafana.libsonnet'; name=name, type=type), - addRowSchema(collapse, showTitle, title):: - g.row.new(collapse=collapse, showTitle=showTitle, title=title), + addRowSchema( + collapse, + showTitle, + title, + collapsed=null + ):: + g.row.new(collapse=collapse, showTitle=showTitle, title=title) + { + [if collapsed != null then 'collapsed']: collapsed, + }, addSingleStatSchema(colors, datasource, @@ -176,7 +225,6 @@ local g = import 'grafonnet/grafana.libsonnet'; unit: unit, valueMaps: valueMaps, }, - matchers():: local jobMatcher = 'job=~"$job"'; local clusterMatcher = '%s=~"$cluster"' % $._config.clusterLabel; @@ -330,4 +378,355 @@ local g = import 'grafonnet/grafana.libsonnet'; 'pie', title, 'current'), + + addStatPanel( + title, + description='', + transparent=false, + datasource=null, + color={}, + unit='none', + overrides=[], + gridPosition={}, + colorMode='none', + graphMode='none', + justifyMode='auto', + orientation='horizontal', + textMode='auto', + reducerFunction='lastNotNull', + pluginVersion='9.1.3', + decimals=0, + interval=null, + maxDataPoints=null, + thresholdsMode='absolute', + rootColorMode=null, + rootColors=null, + cornerRadius=null, + flipCard=null, + flipTime=null, + fontFormat=null, + displayName=null, + isAutoScrollOnOverflow=null, + isGrayOnNoData=null, + isHideAlertsOnDisable=null, + isIgnoreOKColors=null, + ):: + g.statPanel.new( + title=title, + description=description, + transparent=transparent, + datasource=datasource, + unit=unit, + colorMode=colorMode, + graphMode=graphMode, + justifyMode=justifyMode, + orientation=orientation, + textMode=textMode, + reducerFunction=reducerFunction, + pluginVersion=pluginVersion, + decimals=decimals, + thresholdsMode=thresholdsMode, + ) + { + [if interval != null then 'interval']: interval, + [if maxDataPoints != null then 'maxDataPoints']: maxDataPoints, + [if gridPosition != {} then 'gridPos']: gridPosition, + [if rootColorMode != null then 'colorMode']: rootColorMode, + [if rootColors != {} then 'colors']: rootColors, + [if cornerRadius != null then 'cornerRadius']: cornerRadius, + [if flipCard != null then 'flipCard']: flipCard, + [if flipTime != null then 'flipTime']: flipTime, + [if fontFormat != null then 'fontFormat']: fontFormat, + [if displayName != null then 'displayName']: displayName, + [if isAutoScrollOnOverflow != null then 'isAutoScrollOnOverflow']: isAutoScrollOnOverflow, + [if isGrayOnNoData != null then 'isGrayOnNoData']: isGrayOnNoData, + [if isHideAlertsOnDisable != null then 'isHideAlertsOnDisable']: isHideAlertsOnDisable, + [if isIgnoreOKColors != null then 'isIgnoreOKColors']: isIgnoreOKColors, + }, + + addAlertListPanel( + title, + datasource=null, + gridPosition={}, + alertInstanceLabelFilter=null, + alertName=null, + dashboardAlerts=null, + groupBy=null, + groupMode=null, + maxItems=null, + sortOrder=null, + stateFilter=null, + viewMode='list' + ):: + g.alertlist.new( + title=title, + datasource=datasource, + ) + { + gridPos: gridPosition, + options: { + [if alertInstanceLabelFilter != null then 'alertInstanceLabelFilter']: alertInstanceLabelFilter, + [if alertName != null then 'alertName']: alertName, + [if dashboardAlerts != null then 'dashboardAlerts']: dashboardAlerts, + [if groupBy != null then 'groupBy']: groupBy, + [if groupMode != null then 'groupMode']: groupMode, + [if maxItems != null then 'maxItems']: maxItems, + [if sortOrder != null then 'sortOrder']: sortOrder, + [if stateFilter != null then 'stateFilter']: stateFilter, + viewMode: viewMode, + }, + }, + + addCustomTemplate(name='', + query='', + current='', + valuelabels={}, + refresh=0, + label='Interval', + auto_count=10, + auto_min='2m', + options=[], + auto=null):: + g.template.interval(name=name, + query=query, + current=current, + label=label, + auto_count=auto_count, + auto_min=auto_min,) + { + options: options, + refresh: refresh, + valuelabels: valuelabels, + [if auto != null then 'auto']: auto, + }, + + addGaugePanel(title='', + description='', + transparent=false, + datasource='${DS_PROMETHEUS}', + gridPosition={}, + pluginVersion='9.1.3', + unit='percentunit', + instant=false, + reducerFunction='lastNotNull', + steps=[], + max=1, + min=0, + maxDataPoints=100, + interval='1m'):: + g.gaugePanel.new(title=title, + description=description, + transparent=transparent, + datasource=datasource, + pluginVersion=pluginVersion, + unit=unit, + reducerFunction=reducerFunction, + max=max, + min=min) + { + gridPos: gridPosition, + maxDataPoints: maxDataPoints, + interval: interval, + }, + addTableExtended( + title='', + datasource=null, + description=null, + sort=null, + styles='', + transform=null, + pluginVersion='9.1.3', + options=null, + gridPosition={}, + custom=null, + decimals=null, + thresholds=null, + unit=null, + overrides=[], + color=null + ):: + g.tablePanel.new(datasource=datasource, + description=description, + sort=sort, + styles=styles, + title=title, + transform=transform) + { + pluginVersion: pluginVersion, + gridPos: gridPosition, + [if options != null then 'options']: options, + fieldConfig+: { + defaults+: { + [if custom != null then 'custom']: custom, + [if decimals != null then 'decimals']: decimals, + [if thresholds != null then 'thresholds']: thresholds, + [if unit != null then 'unit']: unit, + [if color != null then 'color']: color, + + }, + overrides: overrides, + }, + }, + timeSeriesPanel( + title='', + datasource=null, + gridPosition={}, + colorMode='palette-classic', + axisCenteredZero=false, + axisColorMode='text', + axisLabel='', + axisPlacement='auto', + barAlignment=0, + drawStyle='line', + fillOpacity=0, + gradientMode='none', + lineInterpolation='linear', + lineWidth=0, + pointSize=0, + scaleDistributionType='linear', + showPoints='', + spanNulls=false, + stackingGroup='A', + stackingMode='none', + thresholdsStyleMode='off', + decimals=null, + thresholdsMode='absolute', + unit='none', + tooltip={ mode: 'multi', sort: 'none' }, + pluginVersion='9.1.3', + displayMode='list', + placement='bottom', + showLegend=true, + interval=null, + min=null, + scaleDistributionLog=null, + sortBy=null, + sortDesc=null, + ):: + timeSeries.new( + title=title, + gridPos=gridPosition, + datasource=datasource, + colorMode=colorMode, + axisCenteredZero=axisCenteredZero, + axisColorMode=axisColorMode, + axisLabel=axisLabel, + axisPlacement=axisPlacement, + barAlignment=barAlignment, + drawStyle=drawStyle, + fillOpacity=fillOpacity, + gradientMode=gradientMode, + lineInterpolation=lineInterpolation, + lineWidth=lineWidth, + pointSize=pointSize, + scaleDistributionType=scaleDistributionType, + showPoints=showPoints, + spanNulls=spanNulls, + stackingGroup=stackingGroup, + stackingMode=stackingMode, + thresholdsStyleMode=thresholdsStyleMode, + decimals=decimals, + thresholdsMode=thresholdsMode, + unit=unit, + displayMode=displayMode, + placement=placement, + showLegend=showLegend, + tooltip=tooltip, + min=min, + scaleDistributionLog=scaleDistributionLog, + sortBy=sortBy, + sortDesc=sortDesc, + ) + { + pluginVersion: pluginVersion, + [if interval != null then 'interval']: interval, + }, + + heatMapPanel( + title='', + datasource=null, + gridPosition={}, + colorMode='spectrum', + cardColor='#b4ff00', + colorScale='sqrt', + colorScheme='interpolateOranges', + colorExponent=0.5, + pluginVersion='9.1.3', + dataFormat='timeseries', + hideFrom={ legend: false, tooltip: false, viz: false }, + scaleDistributionType='linear', + legendShow=false, + optionsCalculate=false, + optionsCalculation={ + yBuckets: { + mode: 'count', + scale: { log: 2, type: 'log' }, + value: '1', + }, + }, + optionsCellGap=2, + optionsCellValues={}, + optionsColor={}, + optionsExemplars={}, + optionsFilterValues={}, + optionsLegend={}, + optionsRowFrame={}, + optionsShowValue='never', + optionsToolTip={}, + optionsYAxis={}, + xBucketSize=null, + yAxisDecimals=null, + yAxisFormat='short', + yAxisLogBase=1, + yAxisMin=null, + yAxisMax=null, + yAxisShow=true, + yAxisSplitFactor=1, + yBucketSize=null, + yBucketBound='auto' + ) + :: g.heatmapPanel.new( + title=title, + datasource=datasource, + color_mode=colorMode, + color_cardColor=cardColor, + color_colorScale=colorScale, + color_colorScheme=colorScheme, + color_exponent=colorExponent, + legend_show=legendShow, + xBucketSize=xBucketSize, + yAxis_decimals=yAxisDecimals, + yAxis_format=yAxisFormat, + yAxis_logBase=yAxisLogBase, + yAxis_min=yAxisMin, + yAxis_max=yAxisMax, + yAxis_show=yAxisShow, + yAxis_splitFactor=yAxisSplitFactor, + yBucketSize=yBucketSize, + yBucketBound=yBucketBound + ) + { + gridPos: gridPosition, + pluginVersion: pluginVersion, + color+: { + colorScheme: colorScheme, + }, + fieldConfig: { + defaults: { + custom: { + hideFrom: hideFrom, + scaleDistribution: { + type: scaleDistributionType, + }, + }, + }, + }, + options: { + calculate: optionsCalculate, + calculation: optionsCalculation, + cellGap: optionsCellGap, + cellValues: optionsCellValues, + color: optionsColor, + exemplars: optionsExemplars, + filterValues: optionsFilterValues, + legend: optionsLegend, + rowsFrame: optionsRowFrame, + showValue: optionsShowValue, + tooltip: optionsToolTip, + yAxis: optionsYAxis, + }, + }, } diff --git a/monitoring/ceph-mixin/dashboards_out/ceph-cluster-advanced.json b/monitoring/ceph-mixin/dashboards_out/ceph-cluster-advanced.json new file mode 100644 index 0000000000000..216e02ed1a303 --- /dev/null +++ b/monitoring/ceph-mixin/dashboards_out/ceph-cluster-advanced.json @@ -0,0 +1,3792 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.3.2" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + }, + { + "id": "heatmap", + "name": "Heatmap", + "type": "panel", + "version": "5.0.0" + }, + { + "id": "singlestat", + "name": "Singlestat", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "Ceph cluster overview", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CLUSTER STATE", + "titleSize": "h6", + "type": "row" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ + { + "id": 0, + "options": { + "0": { + "text": "HEALTHY" + }, + "1": { + "text": "WARNING" + }, + "2": { + "text": "ERROR" + } + }, + "type": "value" + }, + { + "id": 1, + "options": { + "match": null, + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#9ac48a" + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 2 + } + ] + }, + "unit": "none" + } + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 3, + "interval": "1m", + "links": [ ], + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "ceph_health_status{}", + "format": "time_series", + "instant": true, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "Ceph health status", + "transparent": true, + "type": "stat" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "links": [ ], + "mappings": [ + { + "id": 0, + "options": { + "match": null, + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(245, 54, 54, 0.9)" + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.10000000000000001 + }, + { + "color": "rgba(50, 172, 45, 0.97)", + "value": 0.29999999999999999 + } + ] + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 3, + "y": 1 + }, + "id": 4, + "interval": "1m", + "links": [ ], + "maxDataPoints": 100, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "(ceph_cluster_total_bytes{}-ceph_cluster_total_used_bytes{})/ceph_cluster_total_bytes{}", + "format": "time_series", + "instant": true, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "Available Capacity", + "transparent": false, + "type": "gauge" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 2, + "links": [ ], + "mappings": [ + { + "id": 0, + "options": { + "match": null, + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)" + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.025000000000000001 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 1 + } + ] + }, + "unit": "decbytes" + } + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 6, + "y": 1 + }, + "id": 5, + "interval": "1m", + "links": [ ], + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "ceph_cluster_total_bytes{}", + "format": "time_series", + "instant": true, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "Cluster Capacity", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 1, + "links": [ ], + "mappings": [ + { + "id": 0, + "options": { + "match": null, + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 9, + "y": 1 + }, + "id": 6, + "links": [ ], + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(irate(ceph_osd_op_w_in_bytes{}[5m]))", + "format": "time_series", + "instant": true, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Write Throughput", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 1, + "links": [ ], + "mappings": [ + { + "id": 0, + "options": { + "match": null, + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a" + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0 + }, + { + "color": "#9ac48a", + "value": 0 + } + ] + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 12, + "y": 1 + }, + "id": 7, + "links": [ ], + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(irate(ceph_osd_op_r_out_bytes{}[5m]))", + "format": "time_series", + "instant": true, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Read Throughput", + "transparent": false, + "type": "stat" + }, + { + "colorMode": "Panel", + "colors": { + "crit": "rgb(255, 0, 0)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgba(50, 128, 45, 0.9)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 0, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "displayName": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + } + }, + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 3, + "w": 6, + "x": 15, + "y": 1 + }, + "id": 8, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [ ], + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "aggregation": "Last", + "alias": "All", + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_osd_metadata)", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "All", + "refId": "A", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "In", + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_osd_in)", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "In", + "refId": "B", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "Out", + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osd_in == bool 0)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Out", + "refId": "C", + "units": "none", + "valueHandler": "Number Threshold", + "warn": 1 + }, + { + "aggregation": "Last", + "alias": "Up", + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osd_up)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Up", + "refId": "D", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "Down", + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osd_up == bool 0)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Down", + "refId": "E", + "units": "none", + "valueHandler": "Number Threshold", + "warn": 1 + } + ], + "title": "OSDs", + "transparent": false, + "type": "stat" + }, + { + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgba(50, 128, 45, 0.9)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 1, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "displayName": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + } + }, + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 6, + "w": 3, + "x": 21, + "y": 1 + }, + "id": 9, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [ ], + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "aggregation": "Last", + "alias": "Active", + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_mgr_status == 1) or vector(0)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Active", + "refId": "A", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "Standby", + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_mgr_status == 0) or vector(0)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Standby", + "refId": "B", + "units": "none", + "valueHandler": "Number Threshold" + } + ], + "title": "MGRs", + "transparent": false, + "type": "stat" + }, + { + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgba(50, 128, 45, 0.9)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 1, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "displayName": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Critical" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Warning" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#987d24", + "mode": "fixed" + } + } + ] + } + ] + }, + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 3, + "w": 3, + "x": 0, + "y": 4 + }, + "id": 10, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [ ], + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "aggregation": "Last", + "alias": "Active", + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ALERTS{alertstate=\"firing\",alertname=~\"^Ceph.+\", severity=\"critical\"}) OR vector(0)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Critical", + "refId": "A", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "Standby", + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ALERTS{alertstate=\"firing\",alertname=~\"^Ceph.+\", severity=\"warning\"}) OR vector(0)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Warning", + "refId": "B", + "units": "none", + "valueHandler": "Number Threshold" + } + ], + "title": "Firing Alerts", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "displayName": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ + { + "id": 0, + "options": { + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.025000000000000001 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 0.10000000000000001 + } + ] + }, + "unit": "decbytes" + } + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 6, + "y": 4 + }, + "id": 11, + "links": [ ], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "ceph_cluster_total_used_bytes{}", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Used Capacity", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "displayName": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ + { + "id": 0, + "options": { + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 9, + "y": 4 + }, + "id": 12, + "links": [ ], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(irate(ceph_osd_op_w{}[1m]))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Write IOPS", + "transparent": false, + "type": "stat" + }, + { + "colors": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "displayName": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ + { + "id": 0, + "options": { + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0 + }, + { + "color": "#9ac48a", + "value": 0 + } + ] + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 12, + "y": 4 + }, + "id": 13, + "links": [ ], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(irate(ceph_osd_op_r{}[1m]))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Read IOPS", + "transparent": false, + "type": "stat" + }, + { + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgba(50, 128, 45, 0.9)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 1, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "displayName": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + } + }, + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 3, + "w": 6, + "x": 15, + "y": 4 + }, + "id": 14, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [ ], + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "aggregation": "Last", + "alias": "In Quorum", + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_mon_quorum_status)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "In Quorum", + "refId": "A", + "units": "none", + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "Total", + "crit": 1, + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_mon_quorum_status)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Total", + "refId": "B", + "units": "none", + "valueHandler": "Text Only", + "warn": 2 + }, + { + "aggregation": "Last", + "alias": "MONs out of Quorum", + "crit": 1.6000000000000001, + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Annotation", + "displayValueWithAlias": "Never", + "expr": "count(ceph_mon_quorum_status) - sum(ceph_mon_quorum_status)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "MONs out of Quorum", + "range": true, + "refId": "C", + "units": "none", + "valueHandler": "Number Threshold", + "warn": 1.1000000000000001 + } + ], + "title": "Monitors", + "transparent": false, + "type": "stat" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 15, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CLUSTER STATS", + "titleSize": "h6", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 8 + }, + "id": 16, + "limit": 10, + "onlyAlertsOnDashboard": true, + "options": { + "alertInstanceLabelFilter": "{alertname=~\"^Ceph.+\"}", + "alertName": "", + "dashboardAlerts": false, + "groupBy": [ ], + "groupMode": "default", + "maxItems": 20, + "sortOrder": 1, + "stateFilter": { + "error": true, + "firing": true, + "noData": false, + "normal": false, + "pending": true + }, + "viewMode": "list" + }, + "show": "current", + "sortOrder": 1, + "stateFilter": [ ], + "title": "Alerts", + "type": "alertlist" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 40, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#c0921f", + "value": 75 + }, + { + "color": "#E02F44", + "value": 85 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total Capacity" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Used" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + }, + { + "id": "custom.thresholdsStyle", + "value": { + "mode": "dashed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 8 + }, + "id": 17, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "ceph_cluster_total_bytes{}", + "format": "time_series", + "instant": false, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Total Capacity", + "range": true, + "refId": "A", + "step": 300 + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "ceph_cluster_total_used_bytes{}", + "format": "time_series", + "instant": false, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Used", + "range": true, + "refId": "B", + "step": 300 + } + ], + "title": "Capacity", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 85 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 8 + }, + "id": 18, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(irate(ceph_osd_op_w_in_bytes{}[5m]))", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Write", + "range": true, + "refId": "A", + "step": 300 + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(irate(ceph_osd_op_r_out_bytes{}[5m]))", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Read", + "range": true, + "refId": "B", + "step": 300 + } + ], + "title": "Cluster Throughput", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 16 + }, + "id": 19, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(irate(ceph_osd_op_w{}[1m]))", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Write", + "range": true, + "refId": "A", + "step": 300 + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(irate(ceph_osd_op_r{}[1m]))", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Read", + "range": true, + "refId": "B", + "step": 300 + } + ], + "title": "IOPS", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 16 + }, + "id": 20, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "(ceph_pool_bytes_used{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A", + "step": 300 + } + ], + "title": "Pool Used Bytes", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "rbd Stored" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "transparent", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 16 + }, + "id": 21, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "(ceph_pool_stored_raw{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "range": true, + "refId": "A", + "step": 300 + } + ], + "title": "Pool Used RAW Bytes", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 24 + }, + "id": 22, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "(ceph_pool_quota_objects{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A", + "step": 300 + } + ], + "title": "Pool Objects Quota", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 24 + }, + "id": 23, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "(ceph_pool_quota_bytes{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A", + "step": 300 + } + ], + "title": "Pool Quota Bytes", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 24 + }, + "id": 24, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "(ceph_pool_objects{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "title": "Objects Per Pool", + "type": "timeseries" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 25, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OBJECTS", + "titleSize": "h6", + "type": "row" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^Total.*$/" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 6, + "x": 0, + "y": 32 + }, + "id": 26, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pool_objects)", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Total", + "range": true, + "refId": "A", + "step": 200 + } + ], + "title": "OSD Type Count", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^Total.*$/" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 6, + "y": 32 + }, + "id": 27, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_active{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Active", + "range": true, + "refId": "A" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_clean{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Clean", + "range": true, + "refId": "B" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_peering{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Peering", + "range": true, + "refId": "C" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_degraded{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Degraded", + "range": true, + "refId": "D", + "step": 300 + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_stale{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Stale", + "range": true, + "refId": "E", + "step": 300 + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_unclean_pgs{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Unclean", + "range": true, + "refId": "F", + "step": 300 + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_undersized{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Undersized", + "range": true, + "refId": "G", + "step": 300 + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_incomplete{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Incomplete", + "range": true, + "refId": "H" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_forced_backfill{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Forced Backfill", + "range": true, + "refId": "I" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_forced_recovery{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Forced Recovery", + "range": true, + "refId": "J" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_creating{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Creating", + "range": true, + "refId": "K" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_wait_backfill{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Wait Backfill", + "range": true, + "refId": "L" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_deep{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Deep", + "range": true, + "refId": "M" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_scrubbing{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Scrubbing", + "range": true, + "refId": "N" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_recovering{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Recovering", + "range": true, + "refId": "O" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_repair{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Repair", + "range": true, + "refId": "P" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_down{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Down", + "range": true, + "refId": "Q" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_peered{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Peered", + "range": true, + "refId": "R" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_backfill{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Backfill", + "range": true, + "refId": "S" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_remapped{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Remapped", + "range": true, + "refId": "T" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_backfill_toofull{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Backfill Toofull", + "range": true, + "refId": "U" + } + ], + "title": "PGs State", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^Total.*$/" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 10, + "x": 14, + "y": 32 + }, + "id": 28, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_degraded{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Degraded", + "range": true, + "refId": "A", + "step": 300 + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_stale{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Stale", + "range": true, + "refId": "B", + "step": 300 + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(ceph_pg_undersized{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Undersized", + "range": true, + "refId": "C", + "step": 300 + } + ], + "title": "Stuck PGs", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 6, + "w": 10, + "x": 14, + "y": 38 + }, + "id": 29, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "sum(irate(ceph_osd_recovery_ops{}[$interval]))", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "OPS", + "refId": "A", + "step": 300 + } + ], + "title": "Recovery Operations", + "type": "timeseries" + }, + { + "collapse": false, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 44 + }, + "id": 30, + "panels": [ + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "opacity" + }, + "dataFormat": "timeseries", + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 42 + }, + "heatmap": { }, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 31, + "legend": { + "show": true + }, + "options": { + "calculate": true, + "calculation": { + "yBuckets": { + "mode": "count", + "scale": { + "log": 2, + "type": "log" + }, + "value": "1" + } + }, + "cellGap": 2, + "cellValues": { }, + "color": { + "exponent": 0.5, + "fill": "#b4ff00", + "mode": "opacity", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1.0000000000000001e-09 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "min": "0", + "reverse": false, + "unit": "ms" + } + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "ceph_osd_apply_latency_ms{}", + "format": "time_series", + "instant": false, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "OSD Apply Latency Distribution", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": "", + "yAxis": { + "decimals": null, + "format": "ms", + "logBase": 2, + "max": null, + "min": "0", + "show": true, + "splitFactor": 1 + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": 10 + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#65c5db", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "opacity" + }, + "dataFormat": "timeseries", + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 42 + }, + "heatmap": { }, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 32, + "legend": { + "show": true + }, + "options": { + "calculate": true, + "calculation": { + "yBuckets": { + "mode": "count", + "scale": { + "log": 2, + "type": "log" + } + } + }, + "cellGap": 2, + "cellValues": { }, + "color": { + "exponent": 0.5, + "fill": "#65c5db", + "mode": "opacity", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1.0000000000000001e-09 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "min": "0", + "reverse": false, + "unit": "ms" + } + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "ceph_osd_commit_latency_ms{}", + "format": "time_series", + "instant": false, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "OSD Commit Latency Distribution", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": "", + "yAxis": { + "decimals": null, + "format": "ms", + "logBase": 2, + "max": null, + "min": "0", + "show": true, + "splitFactor": 1 + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": 10 + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#806eb7", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "opacity" + }, + "dataFormat": "timeseries", + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 50 + }, + "heatmap": { }, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 33, + "legend": { + "show": true + }, + "options": { + "calculate": true, + "calculation": { + "yBuckets": { + "mode": "count", + "scale": { + "log": 2, + "type": "log" + } + } + }, + "cellGap": 2, + "cellValues": { }, + "color": { + "exponent": 0.5, + "fill": "#806eb7", + "mode": "opacity", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1.0000000000000001e-09 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "decimals": 2, + "min": "0", + "reverse": false, + "unit": "ms" + } + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "rate(ceph_osd_op_r_latency_sum{}[5m]) / rate(ceph_osd_op_r_latency_count{}[5m]) >= 0", + "format": "time_series", + "instant": false, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "OSD Read Op Latency Distribution", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": "", + "yAxis": { + "decimals": null, + "format": "ms", + "logBase": 2, + "max": null, + "min": "0", + "show": true, + "splitFactor": 1 + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#f9934e", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "opacity" + }, + "dataFormat": "timeseries", + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 50 + }, + "heatmap": { }, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 34, + "legend": { + "show": true + }, + "options": { + "calculate": true, + "calculation": { + "yBuckets": { + "mode": "count", + "scale": { + "log": 2, + "type": "log" + } + } + }, + "cellGap": 2, + "cellValues": { }, + "color": { + "exponent": 0.5, + "fill": "#f9934e", + "mode": "opacity", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1.0000000000000001e-09 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "decimals": 2, + "min": "0", + "reverse": false, + "unit": "ms" + } + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "rate(ceph_osd_op_w_latency_sum{}[5m]) / rate(ceph_osd_op_w_latency_count{}[5m]) >= 0", + "format": "time_series", + "instant": false, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "OSD Write Op Latency Distribution", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": "", + "yAxis": { + "decimals": null, + "format": "ms", + "logBase": 2, + "max": null, + "min": "0", + "show": true, + "splitFactor": 1 + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 58 + }, + "id": 35, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "avg(rate(ceph_osd_op_r_latency_sum{}[5m]) / rate(ceph_osd_op_r_latency_count{}[5m]) >= 0)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Read", + "refId": "A" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "avg(rate(ceph_osd_op_w_latency_sum{}[5m]) / rate(ceph_osd_op_w_latency_count{}[5m]) >= 0)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Write", + "refId": "B" + } + ], + "title": "Recovery Operations", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 58 + }, + "id": 36, + "interval": "$interval", + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "expr": "avg(ceph_osd_apply_latency_ms{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "apply", + "metric": "ceph_osd_perf_apply_latency_seconds", + "refId": "A", + "step": 4 + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "avg(ceph_osd_commit_latency_ms{})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "commit", + "metric": "ceph_osd_perf_commit_latency_seconds", + "refId": "B", + "step": 4 + } + ], + "title": "AVG OSD Apply + Commit Latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "LATENCY", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": true, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 45 + }, + "id": 37, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6", + "type": "row" + }, + { + "columns": [ ], + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 38, + "links": [ ], + "options": { + "footer": { + "countRows": false, + "enablePagination": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 1, + "showHeader": true + }, + "pluginVersion": "9.4.7", + "styles": "", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": false, + "expr": "count by (ceph_version)(ceph_osd_metadata{})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "OSD Services", + "range": false, + "refId": "A" + }, + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": false, + "expr": "count by (ceph_version)(ceph_mon_metadata{})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Mon Services", + "range": false, + "refId": "B" + }, + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": false, + "expr": "count by (ceph_version)(ceph_mds_metadata{})", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "MDS Services", + "range": false, + "refId": "C" + }, + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": false, + "expr": "count by (ceph_version)(ceph_rgw_metadata{})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "RGW Services", + "range": false, + "refId": "D" + }, + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": false, + "expr": "count by (ceph_version)(ceph_mgr_metadata{})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "MGR Services", + "range": false, + "refId": "E" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Ceph Versions", + "transformations": [ + { + "id": "merge", + "options": { } + }, + { + "id": "organize", + "options": { + "excludeByName": { }, + "indexByName": { }, + "renameByName": { + "Time": "", + "Value #A": "OSD Services", + "Value #B": "Mon Services", + "Value #C": "MDS Services", + "Value #D": "RGW Services", + "Value #E": "MGR Services", + "ceph_version": "Ceph Version" + } + } + } + ], + "type": "table" + } + ], + "refresh": "1m", + "rows": [ ], + "schemaVersion": 38, + "style": "dark", + "tags": [ + "ceph-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "DS_PROMETHEUS", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "auto": true, + "auto_count": 10, + "auto_min": "1m", + "current": { + "text": "$__auto_interval_interval", + "value": "$__auto_interval_interval" + }, + "hide": 0, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "5s", + "value": "5s" + }, + { + "selected": false, + "text": "10s", + "value": "10s" + }, + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "5s,10s,30s,1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "type": "interval", + "valuelabels": { } + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Ceph Cluster - Advanced", + "version": 0 +} diff --git a/monitoring/ceph-mixin/tests_dashboards/features/ceph-cluster.feature b/monitoring/ceph-mixin/tests_dashboards/features/ceph-cluster.feature index 1a446cd2c535c..c00107ec7ae30 100644 --- a/monitoring/ceph-mixin/tests_dashboards/features/ceph-cluster.feature +++ b/monitoring/ceph-mixin/tests_dashboards/features/ceph-cluster.feature @@ -1,54 +1,526 @@ Feature: Ceph Cluster Dashboard -Scenario: "Test total PG States" + Scenario: "Test cluster health" Given the following series: | metrics | values | - | ceph_pg_total{foo="var"} | 10 100 | - | ceph_pg_total{foo="bar"} | 20 200 | - Then Grafana panel `PG States` with legend `Total` shows: + | ceph_health_status{} | 1.0 | + Then Grafana panel `Health Status` with legend `EMPTY` shows: | metrics | values | - | {} | 300 | + | ceph_health_status{} | 1.0 | -Scenario: "Test OSDs in" - Given the following series: - | metrics | values | - | ceph_osd_in{ceph_daemon="osd.0"} | 1.0 | - | ceph_osd_in{ceph_daemon="osd.1"} | 0.0 | - | ceph_osd_in{ceph_daemon="osd.2"} | 1.0 | - When variable `instance` is `.*` - Then Grafana panel `OSDs` with legend `In` shows: - | metrics | values | - | {} | 2 | + Scenario: "Test Firing Alerts Warning" + Given the following series: + | metrics | values | + | ALERTS{alertstate="firing",alertname="Ceph.1", severity="warning"} | 1 | + | ALERTS{alertstate="firing",alertname="Ceph.2", severity="critical"} | 1 | + Then Grafana panel `Firing Alerts` with legend `Warning` shows: + | metrics | values | + | {} | 1 | -Scenario: "Test OSDs down" - Given the following series: - | metrics | values | - | ceph_osd_up{ceph_daemon="osd.0", instance="127.0.0.1"} | 0.0 | - | ceph_osd_up{ceph_daemon="osd.1", instance="127.0.0.1"} | 0.0 | - | ceph_osd_up{ceph_daemon="osd.2", instance="127.0.0.1"} | 0.0 | - When variable `instance` is `127.0.0.1` - Then Grafana panel `OSDs` with legend `Down` shows: - | metrics | values | - | {} | 3 | + Scenario: "Test Firing Alerts Critical" + Given the following series: + | metrics | values | + | ALERTS{alertstate="firing",alertname="Ceph.1", severity="warning"} | 1 | + | ALERTS{alertstate="firing",alertname="Ceph.2", severity="critical"} | 1 | + Then Grafana panel `Firing Alerts` with legend `Critical` shows: + | metrics | values | + | {} | 1 | -Scenario: "Test OSDs out" - Given the following series: - | metrics | values | - | ceph_osd_in{ceph_daemon="osd.0", instance="127.0.0.1"} | 0.0 | - | ceph_osd_in{ceph_daemon="osd.1", instance="127.0.0.1"} | 1.0 | - | ceph_osd_in{ceph_daemon="osd.2", instance="127.0.0.1"} | 0.0 | - When variable `instance` is `127.0.0.1` - Then Grafana panel `OSDs` with legend `Out` shows: - | metrics | values | - | {} | 2 | + Scenario: "Test Available Capacity" + Given the following series: + | metrics | values | + | ceph_cluster_total_bytes{}| 100 | + | ceph_cluster_total_used_bytes{}| 70 | + Then Grafana panel `Available Capacity` with legend `EMPTY` shows: + | metrics | values | + | {} | 0.3 | -Scenario: "Test OSDs all" - Given the following series: - | metrics | values | - | ceph_osd_metadata{ceph_daemon="osd.0", instance="127.0.0.1"} | 1.0 | - | ceph_osd_metadata{ceph_daemon="osd.1", instance="127.0.0.1"} | 1.0 | - | ceph_osd_metadata{ceph_daemon="osd.2", instance="127.0.0.1"} | 1.0 | - When variable `instance` is `127.0.0.1` - Then Grafana panel `OSDs` with legend `All` shows: - | metrics | values | - | {} | 3 | + Scenario: "Test Cluster Capacity" + Given the following series: + | metrics | values | + | ceph_cluster_total_bytes{}| 100 | + Then Grafana panel `Cluster Capacity` with legend `EMPTY` shows: + | metrics | values | + | ceph_cluster_total_bytes{} | 100 | + + Scenario: "Test Used Capacity" + Given the following series: + | metrics | values | + | ceph_cluster_total_used_bytes{}| 100 | + Then Grafana panel `Used Capacity` with legend `EMPTY` shows: + | metrics | values | + | ceph_cluster_total_used_bytes{} | 100 | + + Scenario: "Test Write Throughput" + Given the following series: + | metrics | values | + | ceph_osd_op_w_in_bytes{cluster="mycluster", osd="osd.0"} | 500 500 500 | + | ceph_osd_op_w_in_bytes{cluster="mycluster", osd="osd.1"} | 500 120 110 | + Then Grafana panel `Write Throughput` with legend `EMPTY` shows: + | metrics | values | + | {} | 2 | + + Scenario: "Test Write IOPS" + Given the following series: + | metrics | values | + | ceph_osd_op_w{cluster="mycluster", osd="osd.0"} | 500 500 500 | + | ceph_osd_op_w{cluster="mycluster", osd="osd.1"} | 500 120 110 | + Then Grafana panel `Write IOPS` with legend `EMPTY` shows: + | metrics | values | + | {} | 2 | + + Scenario: "Test Read Throughput" + Given the following series: + | metrics | values | + | ceph_osd_op_r_out_bytes{cluster="mycluster", osd="osd.0"} | 500 500 500 | + | ceph_osd_op_r_out_bytes{cluster="mycluster", osd="osd.1"} | 500 120 110 | + Then Grafana panel `Read Throughput` with legend `EMPTY` shows: + | metrics | values | + | {} | 2 | + + Scenario: "Test Read IOPS" + Given the following series: + | metrics | values | + | ceph_osd_op_r{cluster="mycluster", osd="osd.0"} | 500 500 500 | + | ceph_osd_op_r{cluster="mycluster", osd="osd.1"} | 500 120 110 | + Then Grafana panel `Read IOPS` with legend `EMPTY` shows: + | metrics | values | + | {} | 2 | + + Scenario: "Test OSDs All" + Given the following series: + | metrics | values | + | ceph_osd_metadata{cluster="mycluster", osd="osd.0"} | 1 | + | ceph_osd_metadata{cluster="mycluster", osd="osd.2"} | 1 | + | ceph_osd_metadata{cluster="mycluster", osd="osd.3"} | 1 | + Then Grafana panel `OSDs` with legend `All` shows: + | metrics | values | + | {} | 3 | + + Scenario: "Test OSDs In" + Given the following series: + | metrics | values | + | ceph_osd_in{cluster="mycluster", osd="osd.0"} | 1 | + | ceph_osd_in{cluster="mycluster", osd="osd.1"} | 1 | + | ceph_osd_in{cluster="mycluster", osd="osd.2"} | 1 | + Then Grafana panel `OSDs` with legend `In` shows: + | metrics | values | + | {} | 3 | + + Scenario: "Test OSDs Out" + Given the following series: + | metrics | values | + | ceph_osd_in{cluster="mycluster", osd="osd.0"} | 1 | + | ceph_osd_in{cluster="mycluster", osd="osd.1"} | 0 | + | ceph_osd_in{cluster="mycluster", osd="osd.2"} | 0 | + Then Grafana panel `OSDs` with legend `Out` shows: + | metrics | values | + | {} | 2 | + + Scenario: "Test OSDs Up" + Given the following series: + | metrics | values | + | ceph_osd_up{cluster="mycluster", osd="osd.0"} | 1 | + | ceph_osd_up{cluster="mycluster", osd="osd.1"} | 0 | + | ceph_osd_up{cluster="mycluster", osd="osd.2"} | 0 | + Then Grafana panel `OSDs` with legend `Up` shows: + | metrics | values | + | {} | 1 | + + Scenario: "Test OSDs Down" + Given the following series: + | metrics | values | + | ceph_osd_up{cluster="mycluster", osd="osd.0"} | 1 | + | ceph_osd_up{cluster="mycluster", osd="osd.1"} | 0 | + | ceph_osd_up{cluster="mycluster", osd="osd.2"} | 0 | + Then Grafana panel `OSDs` with legend `Down` shows: + | metrics | values | + | {} | 2 | + + Scenario: "Test MGRs Standby" + Given the following series: + | metrics | values | + | ceph_mgr_status{cluster="mycluster", osd="osd.0"} | 1 | + | ceph_mgr_status{cluster="mycluster", osd="osd.1"} | 0 | + | ceph_mgr_status{cluster="mycluster", osd="osd.2"} | 0 | + Then Grafana panel `MGRs` with legend `Standby` shows: + | metrics | values | + | {} | 2 | + + Scenario: "Test MGRs Active" + Given the following series: + | metrics | values | + | ceph_mgr_status{cluster="mycluster", osd="osd.0"} | 1 | + | ceph_mgr_status{cluster="mycluster", osd="osd.1"} | 0 | + Then Grafana panel `MGRs` with legend `Active` shows: + | metrics | values | + | {} | 1 | + + Scenario: "Test Monitors Total" + Given the following series: + | metrics | values | + | ceph_mon_quorum_status{cluster="mycluster", osd="osd.0"} | 1 | + | ceph_mon_quorum_status{cluster="mycluster", osd="osd.1"} | 0 | + | ceph_mon_quorum_status{cluster="mycluster", osd="osd.2"} | 0 | + Then Grafana panel `Monitors` with legend `Total` shows: + | metrics | values | + | {} | 3 | + + Scenario: "Test Monitors In Quorum" + Given the following series: + | metrics | values | + | ceph_mon_quorum_status{cluster="mycluster", osd="osd.0"} | 1 | + | ceph_mon_quorum_status{cluster="mycluster", osd="osd.1"} | 0 | + | ceph_mon_quorum_status{cluster="mycluster", osd="osd.2"} | 0 | + Then Grafana panel `Monitors` with legend `In Quorum` shows: + | metrics | values | + | {} | 1 | + + Scenario: "Test Monitors out of Quorum" + Given the following series: + | metrics | values | + | ceph_mon_quorum_status{cluster="mycluster", osd="osd.0"} | 1 | + | ceph_mon_quorum_status{cluster="mycluster", osd="osd.1"} | 0 | + | ceph_mon_quorum_status{cluster="mycluster", osd="osd.2"} | 0 | + Then Grafana panel `Monitors` with legend `MONs out of Quorum` shows: + | metrics | values | + | {} | 2 | + + Scenario: "Test Total Capacity" + Given the following series: + | metrics | values | + | ceph_cluster_total_bytes{cluster="mycluster", osd="osd.0"} | 100 | + Then Grafana panel `Capacity` with legend `Total Capacity` shows: + | metrics | values | + | ceph_cluster_total_bytes{cluster="mycluster", osd="osd.0"} | 100 | + + Scenario: "Test Used Capacity" + Given the following series: + | metrics | values | + | ceph_cluster_total_used_bytes{cluster="mycluster", osd="osd.0"} | 100 | + Then Grafana panel `Capacity` with legend `Used` shows: + | metrics | values | + | ceph_cluster_total_used_bytes{cluster="mycluster", osd="osd.0"} | 100 | + + Scenario: "Test Cluster Throughput Write" + Given the following series: + | metrics | values | + | ceph_osd_op_w_in_bytes{cluster="mycluster", osd="osd.0"} | 1000 1000| + | ceph_osd_op_w_in_bytes{cluster="mycluster", osd="osd.1"} | 2000 1500 | + Then Grafana panel `Cluster Throughput` with legend `Write` shows: + | metrics | values | + | {} | 25 | + + Scenario: "Test Cluster Throughput Read" + Given the following series: + | metrics | values | + | ceph_osd_op_r_out_bytes{cluster="mycluster", osd="osd.0"} | 1000 1000| + | ceph_osd_op_r_out_bytes{cluster="mycluster", osd="osd.1"} | 2000 1500 | + Then Grafana panel `Cluster Throughput` with legend `Read` shows: + | metrics | values | + | {} | 25 | + + Scenario: "Test IOPS Read" + Given the following series: + | metrics | values | + | ceph_osd_op_r{cluster="mycluster", osd="osd.0"} | 1000 1000| + | ceph_osd_op_r{cluster="mycluster", osd="osd.1"} | 2000 1500 | + Then Grafana panel `IOPS` with legend `Read` shows: + | metrics | values | + | {} | 25 | + + Scenario: "Test IOPS Write" + Given the following series: + | metrics | values | + | ceph_osd_op_w{cluster="mycluster", osd="osd.0"} | 1000 1000| + | ceph_osd_op_w{cluster="mycluster", osd="osd.1"} | 2000 1500 | + Then Grafana panel `IOPS` with legend `Write` shows: + | metrics | values | + | {} | 25 | + + Scenario: "Test Pool Used Bytes" + Given the following series: + | metrics | values | + | ceph_pool_bytes_used{cluster="mycluster", pool_id="1"} | 10000 | + | ceph_pool_bytes_used{cluster="mycluster", pool_id="2"} | 20000 | + | ceph_pool_bytes_used{cluster="mycluster", pool_id="3"} | 30000 | + | ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 2000 | + | ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 4000 | + | ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 6000 | + Then Grafana panel `Pool Used Bytes` with legend `{{name}}` shows: + | metrics | values | + | {cluster="mycluster", name="pool1", pool_id="1"} | 20000000 | + | {cluster="mycluster", name="pool2", pool_id="2"} | 80000000 | + | {cluster="mycluster", name="pool3", pool_id="3"} | 180000000 | + + Scenario: "Test Pool Used RAW Bytes" + Given the following series: + | metrics | values | + | ceph_pool_stored_raw{cluster="mycluster", pool_id="1"} | 10000 | + | ceph_pool_stored_raw{cluster="mycluster", pool_id="2"} | 20000 | + | ceph_pool_stored_raw{cluster="mycluster", pool_id="3"} | 30000 | + | ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 2000 | + | ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 4000 | + | ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 6000 | + Then Grafana panel `Pool Used RAW Bytes` with legend `{{name}}` shows: + | metrics | values | + | {cluster="mycluster", name="pool1", pool_id="1"} | 20000000 | + | {cluster="mycluster", name="pool2", pool_id="2"} | 80000000 | + | {cluster="mycluster", name="pool3", pool_id="3"} | 180000000 | + + Scenario: "Test Pool Objects Quota" + Given the following series: + | metrics | values | + | ceph_pool_quota_objects{cluster="mycluster", pool_id="1"} | 10 | + | ceph_pool_quota_objects{cluster="mycluster", pool_id="2"} | 20 | + | ceph_pool_quota_objects{cluster="mycluster", pool_id="3"} | 30 | + | ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 10 | + | ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 15 | + | ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 15 | + Then Grafana panel `Pool Objects Quota` with legend `{{name}}` shows: + | metrics | values | + | {cluster="mycluster", name="pool1", pool_id="1"} | 100 | + | {cluster="mycluster", name="pool2", pool_id="2"} | 300 | + | {cluster="mycluster", name="pool3", pool_id="3"} | 450| + + Scenario: "Test Pool Quota Bytes" + Given the following series: + | metrics | values | + | ceph_pool_quota_bytes{cluster="mycluster", pool_id="1"} | 100 | + | ceph_pool_quota_bytes{cluster="mycluster", pool_id="2"} | 200 | + | ceph_pool_quota_bytes{cluster="mycluster", pool_id="3"} | 300 | + | ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 100 | + | ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 150 | + | ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 150 | + Then Grafana panel `Pool Quota Bytes` with legend `{{name}}` shows: + | metrics | values | + | {cluster="mycluster", name="pool1", pool_id="1"} | 10000 | + | {cluster="mycluster", name="pool2", pool_id="2"} | 30000 | + | {cluster="mycluster", name="pool3", pool_id="3"} | 45000 | + + Scenario: "Test Objects Per Pool" + Given the following series: + | metrics | values | + | ceph_pool_objects{cluster="mycluster", pool_id="1"} | 100 | + | ceph_pool_objects{cluster="mycluster", pool_id="2"} | 200 | + | ceph_pool_objects{cluster="mycluster", pool_id="3"} | 300 | + | ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 100 | + | ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 150 | + | ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 150 | + Then Grafana panel `Objects Per Pool` with legend `{{name}}` shows: + | metrics | values | + | {cluster="mycluster", name="pool1", pool_id="1"} | 10000 | + | {cluster="mycluster", name="pool2", pool_id="2"} | 30000 | + | {cluster="mycluster", name="pool3", pool_id="3"} | 45000| + + Scenario: "Test OSD Type Count" + Given the following series: + | metrics | values | + | ceph_pool_objects{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pool_objects{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `OSD Type Count` with legend `Total` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test PGs State Backfill Toofull" + Given the following series: + | metrics | values | + | ceph_pg_backfill_toofull{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_backfill_toofull{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `PGs State` with legend `Backfill Toofull` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test PGs State Remapped" + Given the following series: + | metrics | values | + | ceph_pg_remapped{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_remapped{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `PGs State` with legend `Remapped` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test PGs State Backfill" + Given the following series: + | metrics | values | + | ceph_pg_backfill{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_backfill{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `PGs State` with legend `Backfill` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test PGs State Peered" + Given the following series: + | metrics | values | + | ceph_pg_peered{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_peered{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `PGs State` with legend `Peered` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test PGs State Down" + Given the following series: + | metrics | values | + | ceph_pg_down{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_down{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `PGs State` with legend `Down` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test PGs State Repair" + Given the following series: + | metrics | values | + | ceph_pg_repair{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_repair{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `PGs State` with legend `Repair` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test PGs State Recovering" + Given the following series: + | metrics | values | + | ceph_pg_recovering{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_recovering{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `PGs State` with legend `Recovering` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test PGs State Deep" + Given the following series: + | metrics | values | + | ceph_pg_deep{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_deep{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `PGs State` with legend `Deep` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test PGs State Wait Backfill" + Given the following series: + | metrics | values | + | ceph_pg_wait_backfill{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_wait_backfill{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `PGs State` with legend `Wait Backfill` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test PGs State Creating" + Given the following series: + | metrics | values | + | ceph_pg_creating{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_creating{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `PGs State` with legend `Creating` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test PGs State Forced Recovery" + Given the following series: + | metrics | values | + | ceph_pg_forced_recovery{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_forced_recovery{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `PGs State` with legend `Forced Recovery` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test PGs State Forced Backfill" + Given the following series: + | metrics | values | + | ceph_pg_forced_backfill{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_forced_backfill{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `PGs State` with legend `Forced Backfill` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test PGs State Incomplete" + Given the following series: + | metrics | values | + | ceph_pg_incomplete{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_incomplete{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `PGs State` with legend `Incomplete` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test PGs State Undersized" + Given the following series: + | metrics | values | + | ceph_pg_undersized{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_undersized{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `PGs State` with legend `Undersized` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test Stuck PGs Undersized" + Given the following series: + | metrics | values | + | ceph_pg_undersized{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_undersized{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `Stuck PGs` with legend `Undersized` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test Stuck PGs Stale" + Given the following series: + | metrics | values | + | ceph_pg_stale{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_stale{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `Stuck PGs` with legend `Stale` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test Stuck PGs Degraded" + Given the following series: + | metrics | values | + | ceph_pg_degraded{cluster="mycluster", osd="osd.0"} | 10 | + | ceph_pg_degraded{cluster="mycluster", osd="osd.1"} | 20 | + Then Grafana panel `Stuck PGs` with legend `Degraded` shows: + | metrics | values | + | {} | 30 | + + Scenario: "Test Recovery Operations" + Given the following series: + | metrics | values | + | ceph_osd_recovery_ops{cluster="mycluster", osd="osd.0"}| 250 200 | + | ceph_osd_recovery_ops{cluster="mycluster", osd="osd.1"} | 800 100 | + When variable `interval` is `120s` + Then Grafana panel `Recovery Operations` with legend `OPS` shows: + | metrics | values | + | {} | 5 | + + Scenario: "Test Ceph Versions OSD" + Given the following series: + | metrics | values | + | ceph_osd_metadata{cluster="mycluster", osd="osd.0"}| 17 | + Then Grafana panel `Ceph Versions` with legend `OSD Services` shows: + | metrics | values | + | {} | 1 | + + Scenario: "Test Ceph Versions Mon" + Given the following series: + | metrics | values | + | ceph_mon_metadata{cluster="mycluster", osd="osd.0"}| 17 | + Then Grafana panel `Ceph Versions` with legend `Mon Services` shows: + | metrics | values | + | {} | 1 | + + Scenario: "Test Ceph Versions MDS" + Given the following series: + | metrics | values | + | ceph_mds_metadata{cluster="mycluster", osd="osd.0"}| 17 | + Then Grafana panel `Ceph Versions` with legend `MDS Services` shows: + | metrics | values | + | {} | 1 | + + Scenario: "Test Ceph Versions RGW" + Given the following series: + | metrics | values | + | ceph_rgw_metadata{cluster="mycluster", osd="osd.0"}| 17 | + Then Grafana panel `Ceph Versions` with legend `RGW Services` shows: + | metrics | values | + | {} | 1 | + + Scenario: "Test Ceph Versions MGR" + Given the following series: + | metrics | values | + | ceph_mgr_metadata{cluster="mycluster", osd="osd.0"}| 17 | + Then Grafana panel `Ceph Versions` with legend `MGR Services` shows: + | metrics | values | + | {} | 1 | \ No newline at end of file diff --git a/monitoring/ceph-mixin/tests_dashboards/util.py b/monitoring/ceph-mixin/tests_dashboards/util.py index 835dedee7eb59..4213ff6d5a1e1 100644 --- a/monitoring/ceph-mixin/tests_dashboards/util.py +++ b/monitoring/ceph-mixin/tests_dashboards/util.py @@ -43,6 +43,7 @@ def add_dashboard_queries(data: Dict[str, Any], dashboard_data: Dict[str, Any], if 'panels' not in dashboard_data: return error = 0 + panel_ids_in_file = set() for panel in dashboard_data['panels']: if ( 'title' in panel @@ -54,18 +55,13 @@ def add_dashboard_queries(data: Dict[str, Any], dashboard_data: Dict[str, Any], title = panel['title'] legend_format = target['legendFormat'] if 'legendFormat' in target else "" query_id = f'{title}-{legend_format}' - if query_id in data['queries']: - # NOTE: If two or more panels have the same name and legend it - # might suggest a refactoring is needed or add something else - # to identify each query. - conflict_file = Path(data['queries'][query_id]['path']).name - file = Path(path).name + if query_id in panel_ids_in_file: cprint((f'ERROR: Query in panel "{title}" with legend "{legend_format}"' - f' already exists. Conflict "{conflict_file}" ' - f'with: "{file}"'), 'red') + f' already exists in the same file: "{path}"'), 'red') error = 1 data['queries'][query_id] = {'query': target['expr'], 'path': path} data['stats'][path]['total'] += 1 + panel_ids_in_file.add(query_id) if error: raise ValueError('Missing legend_format in queries, please add a proper value.')