Skip to content

Commit

Permalink
feat(*): add ilm metrics
Browse files Browse the repository at this point in the history
Signed-off-by: iishabakaev <iishabakaev@gmail.com>
  • Loading branch information
iishabakaev committed Jun 8, 2022
1 parent 6bef1fc commit bb4b91c
Show file tree
Hide file tree
Showing 6 changed files with 438 additions and 0 deletions.
164 changes: 164 additions & 0 deletions collector/ilm_indices.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
// Copyright 2021 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package collector

import (
"encoding/json"
"fmt"
"net/http"
"net/url"
"path"

"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
)

type ilmMetric struct {
Type prometheus.ValueType
Desc *prometheus.Desc
Value func(timeMillis float64) float64
Labels []string
}

// Index Lifecycle Management information object
type IlmIndiciesCollector struct {
logger log.Logger
client *http.Client
url *url.URL

up prometheus.Gauge
totalScrapes prometheus.Counter
jsonParseFailures prometheus.Counter

ilmMetric ilmMetric
}

var (
defaultIlmIndicesMappingsLabels = []string{"index", "phase", "action", "step"}
)

// NewIlmIndicies defines Index Lifecycle Management Prometheus metrics
func NewIlmIndicies(logger log.Logger, client *http.Client, url *url.URL) *IlmIndiciesCollector {
subsystem := "ilm_index"

return &IlmIndiciesCollector{
logger: logger,
client: client,
url: url,

up: prometheus.NewGauge(prometheus.GaugeOpts{
Name: prometheus.BuildFQName(namespace, subsystem, "up"),
Help: "Was the last scrape of the ElasticSearch ILM endpoint successful.",
}),
totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
Name: prometheus.BuildFQName(namespace, subsystem, "total_scrapes"),
Help: "Current total ElasticSearch ILM scrapes.",
}),
jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
Name: prometheus.BuildFQName(namespace, subsystem, "json_parse_failures"),
Help: "Number of errors while parsing JSON.",
}),
ilmMetric: ilmMetric{
Type: prometheus.GaugeValue,
Desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "status"),
"Status of ILM policy for index",
defaultIlmIndicesMappingsLabels, nil),
Value: func(timeMillis float64) float64 {
return timeMillis
},
},
}
}

// Describe adds metrics description
func (i *IlmIndiciesCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- i.ilmMetric.Desc
ch <- i.up.Desc()
ch <- i.totalScrapes.Desc()
ch <- i.jsonParseFailures.Desc()
}

func (i *IlmIndiciesCollector) fetchAndDecodeIlm() (IlmResponse, error) {
var ir IlmResponse

u := *i.url
u.Path = path.Join(u.Path, "/_all/_ilm/explain")

res, err := i.client.Get(u.String())
if err != nil {
return ir, fmt.Errorf("failed to get index stats from %s://%s:%s%s: %s",
u.Scheme, u.Hostname(), u.Port(), u.Path, err)
}

defer func() {
err = res.Body.Close()
if err != nil {
_ = level.Warn(i.logger).Log(
"msg", "failed to close http.Client",
"err", err,
)
}
}()

if res.StatusCode != http.StatusOK {
return ir, fmt.Errorf("HTTP Request failed with code %d", res.StatusCode)
}

if err := json.NewDecoder(res.Body).Decode(&ir); err != nil {
i.jsonParseFailures.Inc()
return ir, err
}

return ir, nil
}

func bool2int(managed bool) float64 {
if managed {
return 1
}
return 0
}

// Collect pulls metric values from Elasticsearch
func (i *IlmIndiciesCollector) Collect(ch chan<- prometheus.Metric) {
defer func() {
ch <- i.up
ch <- i.totalScrapes
ch <- i.jsonParseFailures
}()

// indices
ilmResp, err := i.fetchAndDecodeIlm()
if err != nil {
i.up.Set(0)
_ = level.Warn(i.logger).Log(
"msg", "failed to fetch and decode ILM stats",
"err", err,
)
return
}
i.totalScrapes.Inc()
i.up.Set(1)

for indexName, indexIlm := range ilmResp.Indices {
ch <- prometheus.MustNewConstMetric(
i.ilmMetric.Desc,
i.ilmMetric.Type,
i.ilmMetric.Value(bool2int(indexIlm.Managed)),
indexName, indexIlm.Phase, indexIlm.Action, indexIlm.Step,
)
}
}
27 changes: 27 additions & 0 deletions collector/ilm_indices_response.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright 2021 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package collector

type IlmResponse struct {
Indices map[string]IlmIndexResponse `json:"indices"`
}

type IlmIndexResponse struct {
Index string `json:"index"`
Managed bool `json:"managed"`
Phase string `json:"phase"`
Action string `json:"action"`
Step string `json:"step"`
StepTimeMillis float64 `json:"step_time_millis"`
}
166 changes: 166 additions & 0 deletions collector/ilm_status.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// Copyright 2021 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package collector

import (
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"path"

"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
)

var (
ilm_statuses = []string{"STOPPED", "RUNNING", "STOPPING"}
)

type ilmStatusMetric struct {
Type prometheus.ValueType
Desc *prometheus.Desc
Value func(ilm *IlmStatusResponse, status string) float64
Labels func(status string) []string
}

// IlmStatusCollector information struct
type IlmStatusCollector struct {
logger log.Logger
client *http.Client
url *url.URL

up prometheus.Gauge
totalScrapes, jsonParseFailures prometheus.Counter

metric ilmStatusMetric
}

// NewIlmStatus defines Indices IndexIlms Prometheus metrics
func NewIlmStatus(logger log.Logger, client *http.Client, url *url.URL) *IlmStatusCollector {
subsystem := "ilm"

return &IlmStatusCollector{
logger: logger,
client: client,
url: url,

up: prometheus.NewGauge(prometheus.GaugeOpts{
Name: prometheus.BuildFQName(namespace, subsystem, "up"),
Help: "Was the last scrape of the ElasticSearch Indices Ilms endpoint successful.",
}),
totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
Name: prometheus.BuildFQName(namespace, subsystem, "scrapes_total"),
Help: "Current total ElasticSearch Indices Ilms scrapes.",
}),
jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
Name: prometheus.BuildFQName(namespace, subsystem, "json_parse_failures_total"),
Help: "Number of errors while parsing JSON.",
}),
metric: ilmStatusMetric{
Type: prometheus.GaugeValue,
Desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "status"),
"Current status of ilm. Status can be STOPPED, RUNNING, STOPPING.",
ilm_statuses, nil,
),
Value: func(ilm *IlmStatusResponse, status string) float64 {
if ilm.OperationMode == status {
return 1
}
return 0
},
},
}
}

// Describe add Snapshots metrics descriptions
func (im *IlmStatusCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- im.metric.Desc
ch <- im.up.Desc()
ch <- im.totalScrapes.Desc()
ch <- im.jsonParseFailures.Desc()
}

func (im *IlmStatusCollector) getAndParseURL(u *url.URL) (*IlmStatusResponse, error) {
res, err := im.client.Get(u.String())
if err != nil {
return nil, fmt.Errorf("failed to get from %s://%s:%s%s: %s",
u.Scheme, u.Hostname(), u.Port(), u.Path, err)
}

if res.StatusCode != http.StatusOK {
return nil, fmt.Errorf("HTTP Request failed with code %d", res.StatusCode)
}

body, err := ioutil.ReadAll(res.Body)
if err != nil {
_ = level.Warn(im.logger).Log("msg", "failed to read response body", "err", err)
return nil, err
}

err = res.Body.Close()
if err != nil {
_ = level.Warn(im.logger).Log("msg", "failed to close response body", "err", err)
return nil, err
}

var imr IlmStatusResponse
if err := json.Unmarshal(body, &imr); err != nil {
im.jsonParseFailures.Inc()
return nil, err
}

return &imr, nil
}

func (im *IlmStatusCollector) fetchAndDecodeIlm() (*IlmStatusResponse, error) {
u := *im.url
u.Path = path.Join(u.Path, "/_ilm/status")
return im.getAndParseURL(&u)
}

// Collect gets all indices Ilms metric values
func (im *IlmStatusCollector) Collect(ch chan<- prometheus.Metric) {

im.totalScrapes.Inc()
defer func() {
ch <- im.up
ch <- im.totalScrapes
ch <- im.jsonParseFailures
}()

indicesIlmsResponse, err := im.fetchAndDecodeIlm()
if err != nil {
im.up.Set(0)
_ = level.Warn(im.logger).Log(
"msg", "failed to fetch and decode cluster ilm status",
"err", err,
)
return
}
im.up.Set(1)

for _, status := range ilm_statuses {
ch <- prometheus.MustNewConstMetric(
im.metric.Desc,
im.metric.Type,
im.metric.Value(indicesIlmsResponse, status),
status,
)
}

}
19 changes: 19 additions & 0 deletions collector/ilm_status_response.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Copyright 2021 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package collector

// IlmStatusResponse is a representation of elasticsearch ilm status
type IlmStatusResponse struct {
OperationMode string `json:"operation_mode"`
}
Loading

0 comments on commit bb4b91c

Please sign in to comment.