Skip to content

Commit

Permalink
Merge pull request ceph#43707 from BenoitKnecht/ceph-mgr-service-id
Browse files Browse the repository at this point in the history
mgr: Fix ceph_daemon label in ceph_rgw_* metrics

Reviewed-by: Aashish Sharma <aasharma@redhat.com>
Reviewed-by: Ernesto Puerta <epuertat@redhat.com>
Reviewed-by: Pere Diaz Bou <pdiazbou@redhat.com>
  • Loading branch information
epuertat committed Feb 2, 2022
2 parents 8505861 + 2daaa05 commit c47ace9
Show file tree
Hide file tree
Showing 9 changed files with 254 additions and 64 deletions.
2 changes: 1 addition & 1 deletion monitoring/grafana/dashboards/hosts-overview.json
Original file line number Diff line number Diff line change
Expand Up @@ -796,7 +796,7 @@
"multi": false,
"name": "rgw_hosts",
"options": [ ],
"query": "label_values(ceph_rgw_qlen, ceph_daemon)",
"query": "label_values(ceph_rgw_metadata, ceph_daemon)",
"refresh": 1,
"regex": "rgw.(.*)",
"sort": 1,
Expand Down
42 changes: 21 additions & 21 deletions monitoring/grafana/dashboards/jsonnet/grafana_dashboards.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
addTemplateSchema('mds_hosts', '$datasource', 'label_values(ceph_mds_inodes, ceph_daemon)', 1, true, 1, null, 'mds.(.*)')
)
.addTemplate(
addTemplateSchema('rgw_hosts', '$datasource', 'label_values(ceph_rgw_qlen, ceph_daemon)', 1, true, 1, null, 'rgw.(.*)')
addTemplateSchema('rgw_hosts', '$datasource', 'label_values(ceph_rgw_metadata, ceph_daemon)', 1, true, 1, null, 'rgw.(.*)')
)
.addPanels([
HostsOverviewSingleStatPanel(
Expand Down Expand Up @@ -450,7 +450,7 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
type='panel', id='graph', name='Graph', version='5.0.0'
)
.addTemplate(
addTemplateSchema('rgw_servers', '$datasource', 'label_values(ceph_rgw_req, ceph_daemon)', 1, true, 1, '', '')
addTemplateSchema('rgw_servers', '$datasource', 'label_values(ceph_rgw_metadata, ceph_daemon)', 1, true, 1, '', '')
)
.addTemplate(
addTemplateSchema('code', '$datasource', 'label_values(haproxy_server_http_responses_total{instance=~"$ingress_service"}, code)', 1, true, 1, 'HTTP Code', '')
Expand All @@ -468,14 +468,14 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
'',
's',
'short',
'rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s])',
'rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata',
'GET AVG',
0, 1, 8, 7
)
.addTargets(
[
addTargetSchema(
'rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s])',
'rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata',
1,
'time_series',
'PUT AVG'
Expand All @@ -485,7 +485,7 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
'',
'none',
'short',
'sum by(rgw_host) (label_replace(rate(ceph_rgw_req[30s]), "rgw_host", "$1", "ceph_daemon", "rgw.(.*)"))',
'sum by (rgw_host) (label_replace(rate(ceph_rgw_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata, "rgw_host", "$1", "ceph_daemon", "rgw.(.*)"))',
'{{rgw_host}}',
8, 1, 7, 7
),
Expand All @@ -494,7 +494,7 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
'Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts',
's',
'short',
'label_replace(rate(ceph_rgw_get_initial_lat_sum[30s]),"rgw_host","$1","ceph_daemon","rgw.(.*)") / \nlabel_replace(rate(ceph_rgw_get_initial_lat_count[30s]),"rgw_host","$1","ceph_daemon","rgw.(.*)")',
'label_replace(\n rate(ceph_rgw_get_initial_lat_sum[30s]) /\n rate(ceph_rgw_get_initial_lat_count[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,\n"rgw_host", "$1", "ceph_daemon", "rgw.(.*)")',
'{{rgw_host}}',
15, 1, 6, 7
),
Expand All @@ -520,7 +520,7 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
'Total bytes transferred in/out through get/put operations, by radosgw instance',
'bytes',
'short',
'sum by(rgw_host) (\n (label_replace(rate(ceph_rgw_get_b[30s]), "rgw_host","$1","ceph_daemon","rgw.(.*)")) + \n (label_replace(rate(ceph_rgw_put_b[30s]), "rgw_host","$1","ceph_daemon","rgw.(.*)"))\n)',
'label_replace(sum by (instance_id) (\n rate(ceph_rgw_get_b[30s]) + \n rate(ceph_rgw_put_b[30s])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata, "rgw_host", "$1", "ceph_daemon", "rgw.(.*)")',
'{{rgw_host}}',
8, 8, 7, 6
),
Expand All @@ -529,7 +529,7 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
'Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts',
's',
'short',
'label_replace(rate(ceph_rgw_put_initial_lat_sum[30s]),"rgw_host","$1","ceph_daemon","rgw.(.*)") / \nlabel_replace(rate(ceph_rgw_put_initial_lat_count[30s]),"rgw_host","$1","ceph_daemon","rgw.(.*)")',
'label_replace(\n rate(ceph_rgw_put_initial_lat_sum[30s]) /\n rate(ceph_rgw_put_initial_lat_count[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,\n"rgw_host", "$1", "ceph_daemon", "rgw.(.*)")',
'{{rgw_host}}',
15, 8, 6, 6
),
Expand Down Expand Up @@ -659,7 +659,7 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
)
.addTemplate(
addTemplateSchema('rgw_servers', '$datasource', 'label_values(ceph_rgw_req, ceph_daemon)', 1, true, 1, '', '')
addTemplateSchema('rgw_servers', '$datasource', 'label_values(ceph_rgw_metadata, ceph_daemon)', 1, true, 1, '', '')
)
.addPanels([
addRowSchema(false, true, 'RGW Host Detail : $rgw_servers') + {gridPos: {x: 0, y: 0, w: 24, h: 1}},
Expand All @@ -669,8 +669,8 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
'',
's',
'short',
'sum by (ceph_daemon) (rate(ceph_rgw_get_initial_lat_sum{ceph_daemon=~"($rgw_servers)"}[30s]) / rate(ceph_rgw_get_initial_lat_count{ceph_daemon=~"($rgw_servers)"}[30s]))',
'sum by (ceph_daemon)(rate(ceph_rgw_put_initial_lat_sum{ceph_daemon=~"($rgw_servers)"}[30s]) / rate(ceph_rgw_put_initial_lat_count{ceph_daemon=~"($rgw_servers)"}[30s]))',
'sum by (instance_id) (rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s])) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
'sum by (instance_id) (rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s])) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
'GET {{ceph_daemon}}',
'PUT {{ceph_daemon}}',
0, 1, 6, 8
Expand All @@ -681,8 +681,8 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
'',
'bytes',
'short',
'rate(ceph_rgw_get_b{ceph_daemon=~"$rgw_servers"}[30s])',
'rate(ceph_rgw_put_b{ceph_daemon=~"$rgw_servers"}[30s])',
'rate(ceph_rgw_get_b[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
'rate(ceph_rgw_put_b[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
'GETs {{ceph_daemon}}',
'PUTs {{ceph_daemon}}',
6, 1, 7, 8
Expand All @@ -693,22 +693,22 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
'',
'short',
'short',
'rate(ceph_rgw_failed_req{ceph_daemon=~"$rgw_servers"}[30s])',
'rate(ceph_rgw_get{ceph_daemon=~"$rgw_servers"}[30s])',
'rate(ceph_rgw_failed_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
'rate(ceph_rgw_get[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
'Requests Failed {{ceph_daemon}}',
'GETs {{ceph_daemon}}',
13, 1, 7, 8
)
.addTargets(
[
addTargetSchema(
'rate(ceph_rgw_put{ceph_daemon=~"$rgw_servers"}[30s])',
'rate(ceph_rgw_put[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
1,
'time_series',
'PUTs {{ceph_daemon}}'
),
addTargetSchema(
'rate(ceph_rgw_req{ceph_daemon=~"$rgw_servers"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~"$rgw_servers"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~"$rgw_servers"}[30s]))',
'(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
1,
'time_series',
'Other {{ceph_daemon}}'
Expand All @@ -722,10 +722,10 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
'Workload Breakdown',
'current'
)
.addTarget(addTargetSchema('rate(ceph_rgw_failed_req{ceph_daemon=~"$rgw_servers"}[30s])', 1, 'time_series', 'Failures {{ceph_daemon}}'))
.addTarget(addTargetSchema('rate(ceph_rgw_get{ceph_daemon=~"$rgw_servers"}[30s])', 1, 'time_series', 'GETs {{ceph_daemon}}'))
.addTarget(addTargetSchema('rate(ceph_rgw_put{ceph_daemon=~"$rgw_servers"}[30s])', 1, 'time_series', 'PUTs {{ceph_daemon}}'))
.addTarget(addTargetSchema('rate(ceph_rgw_req{ceph_daemon=~"$rgw_servers"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~"$rgw_servers"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~"$rgw_servers"}[30s]))', 1, 'time_series', 'Other (DELETE,LIST) {{ceph_daemon}}')) + {gridPos: {x: 20, y: 1, w: 4, h: 8}}
.addTarget(addTargetSchema('rate(ceph_rgw_failed_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', 1, 'time_series', 'Failures {{ceph_daemon}}'))
.addTarget(addTargetSchema('rate(ceph_rgw_get[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', 1, 'time_series', 'GETs {{ceph_daemon}}'))
.addTarget(addTargetSchema('rate(ceph_rgw_put[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', 1, 'time_series', 'PUTs {{ceph_daemon}}'))
.addTarget(addTargetSchema('(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', 1, 'time_series', 'Other (DELETE,LIST) {{ceph_daemon}}')) + {gridPos: {x: 20, y: 1, w: 4, h: 8}}
])
}
{
Expand Down
26 changes: 13 additions & 13 deletions monitoring/grafana/dashboards/radosgw-detail.json
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,14 @@
"steppedLine": false,
"targets": [
{
"expr": "sum by (ceph_daemon) (rate(ceph_rgw_get_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_get_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s]))",
"expr": "sum by (instance_id) (rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s])) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GET {{ceph_daemon}}",
"refId": "A"
},
{
"expr": "sum by (ceph_daemon)(rate(ceph_rgw_put_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_put_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s]))",
"expr": "sum by (instance_id) (rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s])) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUT {{ceph_daemon}}",
Expand Down Expand Up @@ -196,14 +196,14 @@
"steppedLine": false,
"targets": [
{
"expr": "rate(ceph_rgw_get_b{ceph_daemon=~\"$rgw_servers\"}[30s])",
"expr": "rate(ceph_rgw_get_b[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GETs {{ceph_daemon}}",
"refId": "A"
},
{
"expr": "rate(ceph_rgw_put_b{ceph_daemon=~\"$rgw_servers\"}[30s])",
"expr": "rate(ceph_rgw_put_b[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUTs {{ceph_daemon}}",
Expand Down Expand Up @@ -294,28 +294,28 @@
"steppedLine": false,
"targets": [
{
"expr": "rate(ceph_rgw_failed_req{ceph_daemon=~\"$rgw_servers\"}[30s])",
"expr": "rate(ceph_rgw_failed_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Requests Failed {{ceph_daemon}}",
"refId": "A"
},
{
"expr": "rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s])",
"expr": "rate(ceph_rgw_get[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GETs {{ceph_daemon}}",
"refId": "B"
},
{
"expr": "rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s])",
"expr": "rate(ceph_rgw_put[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUTs {{ceph_daemon}}",
"refId": "C"
},
{
"expr": "rate(ceph_rgw_req{ceph_daemon=~\"$rgw_servers\"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s]))",
"expr": "(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Other {{ceph_daemon}}",
Expand Down Expand Up @@ -384,28 +384,28 @@
"pieType": "pie",
"targets": [
{
"expr": "rate(ceph_rgw_failed_req{ceph_daemon=~\"$rgw_servers\"}[30s])",
"expr": "rate(ceph_rgw_failed_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Failures {{ceph_daemon}}",
"refId": "A"
},
{
"expr": "rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s])",
"expr": "rate(ceph_rgw_get[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GETs {{ceph_daemon}}",
"refId": "B"
},
{
"expr": "rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s])",
"expr": "rate(ceph_rgw_put[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUTs {{ceph_daemon}}",
"refId": "C"
},
{
"expr": "rate(ceph_rgw_req{ceph_daemon=~\"$rgw_servers\"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s]))",
"expr": "(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Other (DELETE,LIST) {{ceph_daemon}}",
Expand Down Expand Up @@ -450,7 +450,7 @@
"multi": false,
"name": "rgw_servers",
"options": [ ],
"query": "label_values(ceph_rgw_req, ceph_daemon)",
"query": "label_values(ceph_rgw_metadata, ceph_daemon)",
"refresh": 1,
"regex": "",
"sort": 1,
Expand Down

0 comments on commit c47ace9

Please sign in to comment.