From 0d0e9df38102f8e9d8b494936304cebf552287e7 Mon Sep 17 00:00:00 2001 From: zhangxu Date: Fri, 17 Nov 2023 07:48:29 +0800 Subject: [PATCH 1/2] add more go runtime metrics --- go.mod | 10 +- go.sum | 19 +-- pkg/util/metric/mometric/metric.go | 7 +- .../metric/v2/dashboard/grafana_dashboard.go | 6 +- .../v2/dashboard/grafana_dashboard_runtime.go | 147 ++++++++++++++++++ pkg/util/metric/v2/metrics.go | 16 +- 6 files changed, 187 insertions(+), 18 deletions(-) create mode 100644 pkg/util/metric/v2/dashboard/grafana_dashboard_runtime.go diff --git a/go.mod b/go.mod index 56953f86a8e5..48fba7e3971d 100644 --- a/go.mod +++ b/go.mod @@ -45,8 +45,8 @@ require ( github.com/pkg/errors v0.9.1 github.com/plar/go-adaptive-radix-tree v1.0.5 github.com/prashantv/gostub v1.1.0 - github.com/prometheus/client_golang v1.15.1 - github.com/prometheus/client_model v0.4.0 + github.com/prometheus/client_golang v1.17.0 + github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 github.com/robfig/cron/v3 v3.0.1 github.com/samber/lo v1.38.1 github.com/shirou/gopsutil/v3 v3.22.4 @@ -134,9 +134,9 @@ require ( github.com/pingcap/errors v0.11.5-0.20201029093017-5a7df2af2ac7 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect - github.com/prometheus/common v0.42.0 // indirect - github.com/prometheus/procfs v0.9.0 // indirect - github.com/rogpeppe/go-internal v1.9.0 // indirect + github.com/prometheus/common v0.44.0 // indirect + github.com/prometheus/procfs v0.11.1 // indirect + github.com/rogpeppe/go-internal v1.10.0 // indirect github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect github.com/sirupsen/logrus v1.9.3 // indirect github.com/smartystreets/assertions v1.13.1 // indirect diff --git a/go.sum b/go.sum index 5de4ae87bb9d..9856028c8cd4 100644 --- a/go.sum +++ b/go.sum @@ -414,22 +414,23 @@ github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= -github.com/prometheus/client_golang v1.15.1 h1:8tXpTmJbyH5lydzFPoxSIJ0J46jdh3tylbvM1xCv0LI= -github.com/prometheus/client_golang v1.15.1/go.mod h1:e9yaBhRPU2pPNsZwE+JdQl0KEt1N9XgF6zxWmaC0xOk= +github.com/prometheus/client_golang v1.17.0 h1:rl2sfwZMtSthVU752MqfjQozy7blglC+1SOtjMAMh+Q= +github.com/prometheus/client_golang v1.17.0/go.mod h1:VeL+gMmOAxkS2IqfCq0ZmHSL+LjWfWDUmp1mBz9JgUY= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUooCfx1yqY= -github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU= -github.com/prometheus/common v0.42.0 h1:EKsfXEYo4JpWMHH5cg+KOUWeuJSov1Id8zGR8eeI1YM= -github.com/prometheus/common v0.42.0/go.mod h1:xBwqVerjNdUDjgODMpudtOMwlOwf2SaTr1yjz4b7Zbc= -github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJfhI= -github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY= +github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 h1:v7DLqVdK4VrYkVD5diGdl4sxJurKJEMnODWRJlxV9oM= +github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU= +github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY= +github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY= +github.com/prometheus/procfs v0.11.1 h1:xRC8Iq1yyca5ypa9n1EZnWZkt7dwcoRPQwX/5gwaUuI= +github.com/prometheus/procfs v0.11.1/go.mod h1:eesXgaPo1q7lBpVMoMy0ZOFTth9hBn4W/y0/p/ScXhY= github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/rogpeppe/go-internal v1.8.1/go.mod h1:JeRgkft04UBgHMgCIwADu4Pn6Mtm5d4nPKWu0nJ5d+o= -github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= diff --git a/pkg/util/metric/mometric/metric.go b/pkg/util/metric/mometric/metric.go index b4e3cc5848d3..109ab2c37992 100644 --- a/pkg/util/metric/mometric/metric.go +++ b/pkg/util/metric/mometric/metric.go @@ -23,6 +23,9 @@ import ( "sync/atomic" "time" + "github.com/matrixorigin/matrixone/pkg/common/mpool" + v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2" + "github.com/matrixorigin/matrixone/pkg/common/runtime" "github.com/matrixorigin/matrixone/pkg/util/metric" "github.com/matrixorigin/matrixone/pkg/util/metric/stats" @@ -115,7 +118,7 @@ func InitMetric(ctx context.Context, ieFactory func() ie.InternalExecutor, SV *c if metric.EnableExportToProm() { // http.HandleFunc("/query", makeDebugHandleFunc(ieFactory)) mux := http.NewServeMux() - mux.Handle("/metrics", promhttp.HandlerFor(prom.DefaultGatherer, promhttp.HandlerOpts{})) + mux.Handle("/metrics", promhttp.HandlerFor(v2.GetPrometheusGatherer(), promhttp.HandlerOpts{})) addr := fmt.Sprintf(":%d", SV.StatusPort) statusSvr = &statusServer{Server: &http.Server{Addr: addr, Handler: mux}} statusSvr.Add(1) @@ -176,7 +179,7 @@ func StopMetricSync() { } func mustRegiterToProm(collector prom.Collector) { - if err := prom.Register(collector); err != nil { + if err := v2.GetPrometheusRegistry().Register(collector); err != nil { // err is either registering a collector more than once or metrics have duplicate description. // in any case, we respect the existing collectors in the prom registry logutil.Debugf("[Metric] register to prom register: %v", err) diff --git a/pkg/util/metric/v2/dashboard/grafana_dashboard.go b/pkg/util/metric/v2/dashboard/grafana_dashboard.go index bcaab532ccbe..a480c448c737 100644 --- a/pkg/util/metric/v2/dashboard/grafana_dashboard.go +++ b/pkg/util/metric/v2/dashboard/grafana_dashboard.go @@ -94,6 +94,10 @@ func (c *DashboardCreator) Create() error { return err } + if err := c.initRuntimeDashboard(); err != nil { + return err + } + return nil } @@ -167,7 +171,7 @@ func (c *DashboardCreator) getHistogramWithExtraBy( legend := fmt.Sprintf("P%.2f%%", percent*100) if len(extraBy) > 0 { query = fmt.Sprintf("histogram_quantile(%f, sum(rate(%s[$interval])) by (le, %s))", percent, metric, extraBy) - legend = fmt.Sprintf("{{ name }}(P%.2f%%)", percent*100) + legend = fmt.Sprintf("{{ "+extraBy+" }}(P%.2f%%)", percent*100) } queries = append(queries, query) legends = append(legends, legend) diff --git a/pkg/util/metric/v2/dashboard/grafana_dashboard_runtime.go b/pkg/util/metric/v2/dashboard/grafana_dashboard_runtime.go new file mode 100644 index 000000000000..6fcba5571c50 --- /dev/null +++ b/pkg/util/metric/v2/dashboard/grafana_dashboard_runtime.go @@ -0,0 +1,147 @@ +// Copyright 2023 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dashboard + +import ( + "context" + + "github.com/K-Phoen/grabana/axis" + "github.com/K-Phoen/grabana/dashboard" +) + +func (c *DashboardCreator) initRuntimeDashboard() error { + folder, err := c.createFolder(moFolderName) + if err != nil { + return err + } + + build, err := dashboard.New( + "Go Runtime Metrics", + c.withRowOptions( + c.initMemoryRow(), + c.initGCRow(), + c.initGoroutineRow(), + )...) + if err != nil { + return err + } + _, err = c.cli.UpsertDashboard(context.Background(), folder, build) + return err +} + +func (c *DashboardCreator) initGCRow() dashboard.Option { + return dashboard.Row( + "Go GC Status", + c.getHistogramWithExtraBy( + "STW duration", + c.getMetricWithFilter(`go_gc_pauses_seconds_bucket`, ``), + []float64{0.8, 0.90, 0.95, 0.99}, + 12, + c.by, + axis.Unit("s"), + axis.Min(0)), + ) +} + +func (c *DashboardCreator) initGoroutineRow() dashboard.Option { + return dashboard.Row( + "Goroutine Status", + c.withGraph( + "Goroutine count", + 6, + `sum(`+c.getMetricWithFilter("go_goroutines", "")+`) by (`+c.by+`)`, + "{{ "+c.by+" }}"), + + c.getHistogramWithExtraBy( + "Schedule latency duration", + c.getMetricWithFilter(`go_sched_latencies_seconds_bucket`, ``), + []float64{0.8, 0.90, 0.95, 0.99}, + 6, + c.by, + axis.Unit("s"), + axis.Min(0)), + ) +} + +func (c *DashboardCreator) initMemoryRow() dashboard.Option { + return dashboard.Row( + "Memory Status", + c.withGraph( + "Live Objects", + 3, + `sum(`+c.getMetricWithFilter("go_gc_heap_objects_objects", "")+`) by (`+c.by+`)`, + "{{ "+c.by+" }}"), + + c.withGraph( + "Free and ready to return system", + 3, + `sum(`+c.getMetricWithFilter("go_memory_classes_heap_free_bytes", "")+`) by (`+c.by+`)`, + "{{ "+c.by+" }}", + axis.Unit("bytes"), + axis.Min(0)), + + c.withGraph( + "Dead objects and not marked free live objects", + 3, + `sum(`+c.getMetricWithFilter("go_memory_classes_heap_objects_bytes", "")+`) by (`+c.by+`)`, + "{{ "+c.by+" }}", + axis.Unit("bytes"), + axis.Min(0)), + + c.withGraph( + "Released to system", + 3, + `sum(`+c.getMetricWithFilter("go_memory_classes_heap_released_bytes", "")+`) by (`+c.by+`)`, + "{{ "+c.by+" }}", + axis.Unit("bytes"), + axis.Min(0)), + + c.withGraph( + "Heap Allocation Bytes/s", + 3, + `sum(rate(`+c.getMetricWithFilter("go_gc_heap_allocs_bytes_total", "")+`[$interval])) by (`+c.by+`)`, + "{{ "+c.by+" }}", + axis.Unit("bytes"), + axis.Min(0)), + + c.withGraph( + "Heap Free Bytes/s", + 3, + `sum(rate(`+c.getMetricWithFilter("go_gc_heap_frees_bytes_total", "")+`[$interval])) by (`+c.by+`)`, + "{{ "+c.by+" }}", + axis.Unit("bytes"), + axis.Min(0)), + + c.withGraph( + "Heap Allocation Object/s", + 3, + `sum(rate(`+c.getMetricWithFilter("go_gc_heap_allocs_objects_total", "")+`[$interval])) by (`+c.by+`)`, + "{{ "+c.by+" }}"), + + c.withGraph( + "Heap Free Object/s", + 3, + `sum(rate(`+c.getMetricWithFilter("go_gc_heap_frees_objects_total", "")+`[$interval])) by (`+c.by+`)`, + "{{ "+c.by+" }}"), + + c.getHistogram( + "Allocation bytes size", + c.getMetricWithFilter(`go_gc_heap_allocs_by_size_bytes_bucket`, ``), + []float64{0.8, 0.90, 0.95, 0.99}, + 12, + axis.Unit("bytes"), + axis.Min(0)), + ) +} diff --git a/pkg/util/metric/v2/metrics.go b/pkg/util/metric/v2/metrics.go index ae2958c9ac1d..9517c6fb07db 100644 --- a/pkg/util/metric/v2/metrics.go +++ b/pkg/util/metric/v2/metrics.go @@ -16,13 +16,27 @@ package v2 import ( "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" ) var ( - registry = prometheus.DefaultRegisterer + registry = prometheus.NewRegistry() ) +func GetPrometheusRegistry() prometheus.Registerer { + return registry +} + +func GetPrometheusGatherer() prometheus.Gatherer { + return registry +} + func init() { + registry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})) + registry.MustRegister(collectors.NewGoCollector( + collectors.WithGoCollectorRuntimeMetrics(collectors.MetricsAll), + )) + initFileServiceMetrics() initLogtailMetrics() initTxnMetrics() From ee0a2d942ddf8c206ce5c15e70b6f0490852dc1a Mon Sep 17 00:00:00 2001 From: zhangxu Date: Fri, 17 Nov 2023 09:57:15 +0800 Subject: [PATCH 2/2] fix --- pkg/util/metric/mometric/metric.go | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/pkg/util/metric/mometric/metric.go b/pkg/util/metric/mometric/metric.go index 109ab2c37992..ca5d75f4ac4f 100644 --- a/pkg/util/metric/mometric/metric.go +++ b/pkg/util/metric/mometric/metric.go @@ -23,18 +23,15 @@ import ( "sync/atomic" "time" - "github.com/matrixorigin/matrixone/pkg/common/mpool" - v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2" - - "github.com/matrixorigin/matrixone/pkg/common/runtime" - "github.com/matrixorigin/matrixone/pkg/util/metric" - "github.com/matrixorigin/matrixone/pkg/util/metric/stats" - "github.com/matrixorigin/matrixone/pkg/common/moerr" + "github.com/matrixorigin/matrixone/pkg/common/runtime" "github.com/matrixorigin/matrixone/pkg/config" "github.com/matrixorigin/matrixone/pkg/logutil" "github.com/matrixorigin/matrixone/pkg/util/export/table" ie "github.com/matrixorigin/matrixone/pkg/util/internalExecutor" + "github.com/matrixorigin/matrixone/pkg/util/metric" + "github.com/matrixorigin/matrixone/pkg/util/metric/stats" + v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2" prom "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -277,9 +274,9 @@ type descExtra struct { func newDescExtra(desc *prom.Desc) *descExtra { str := desc.String()[14:] // strip Desc{fqName: " fqName := str[:strings.Index(str, "\"")] - str = str[strings.Index(str, "variableLabels: [")+17:] // spot varlbl list - str = str[:strings.Index(str, "]")] - varLblCnt := len(strings.Split(str, " ")) + str = str[strings.Index(str, "variableLabels: {")+17:] // spot varlbl list + str = str[:strings.Index(str, "}")] + varLblCnt := len(strings.Split(str, ",")) labels := prom.MakeLabelPairs(desc, make([]string, varLblCnt)) return &descExtra{orig: desc, fqName: fqName, labels: labels} }