Skip to content

Commit

Permalink
Add metrics for virtual-kubelet
Browse files Browse the repository at this point in the history
Co-authored-by: Francesco Cheinasso <cheinasso.francesco@gmail.com>
  • Loading branch information
2 people authored and adamjensenbot committed Apr 14, 2023
1 parent 1a9ff8e commit 4efc2ed
Show file tree
Hide file tree
Showing 13 changed files with 189 additions and 0 deletions.
1 change: 1 addition & 0 deletions cmd/virtual-kubelet/root/flag.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ func InstallFlags(flags *pflag.FlagSet, o *Opts) {
flags.BoolVar(&o.EnableStorage, "enable-storage", false, "Enable the Liqo storage reflection")
flags.StringVar(&o.VirtualStorageClassName, "virtual-storage-class-name", "liqo", "Name of the virtual storage class")
flags.StringVar(&o.RemoteRealStorageClassName, "remote-real-storage-class-name", "", "Name of the real storage class to use for the actual volumes")
flags.BoolVar(&o.EnableMetrics, "enable-metrics", false, "Enable the metrics server")
flags.StringVar(&o.HomeAPIServerHost, "home-api-server-host", "",
"Home cluster API server HOST, this parameter is optional and required only to override the default values")
flags.StringVar(&o.HomeAPIServerPort, "home-api-server-port", "",
Expand Down
1 change: 1 addition & 0 deletions cmd/virtual-kubelet/root/opts.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ type Opts struct {
EnableStorage bool
VirtualStorageClassName string
RemoteRealStorageClassName string
EnableMetrics bool

HomeAPIServerHost string
HomeAPIServerPort string
Expand Down
6 changes: 6 additions & 0 deletions cmd/virtual-kubelet/root/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"github.com/liqotech/liqo/pkg/utils"
"github.com/liqotech/liqo/pkg/utils/restcfg"
nodeprovider "github.com/liqotech/liqo/pkg/virtualKubelet/liqoNodeProvider"
metrics "github.com/liqotech/liqo/pkg/virtualKubelet/metrics"
podprovider "github.com/liqotech/liqo/pkg/virtualKubelet/provider"
)

Expand Down Expand Up @@ -110,6 +111,7 @@ func runRootCommand(ctx context.Context, c *Opts) error {
EnableStorage: c.EnableStorage,
VirtualStorageClassName: c.VirtualStorageClassName,
RemoteRealStorageClassName: c.RemoteRealStorageClassName,
EnableMetrics: c.EnableMetrics,

HomeAPIServerHost: c.HomeAPIServerHost,
HomeAPIServerPort: c.HomeAPIServerPort,
Expand Down Expand Up @@ -194,6 +196,10 @@ func runRootCommand(ctx context.Context, c *Opts) error {
return errors.Wrap(err, "error while setting up HTTPS server")
}

if c.EnableMetrics {
metrics.SetupMetricHandler()
}

go func() {
if err := nodeRunner.Run(ctx); err != nil {
klog.Error(err, "error in pod controller running")
Expand Down
5 changes: 5 additions & 0 deletions deployments/liqo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,11 @@
| virtualKubelet.extra.labels | object | `{}` | virtual kubelet pod extra labels |
| virtualKubelet.extra.resources | object | `{"limits":{},"requests":{}}` | virtual kubelet pod containers' resource requests and limits (https://kubernetes.io/docs/user-guide/compute-resources/) |
| virtualKubelet.imageName | string | `"ghcr.io/liqotech/virtual-kubelet"` | virtual kubelet image repository |
| virtualKubelet.metrics.enabled | bool | `false` | expose metrics about virtual kubelet resources. |
| virtualKubelet.metrics.podMonitor.enabled | bool | `false` | |
| virtualKubelet.metrics.podMonitor.interval | string | `""` | |
| virtualKubelet.metrics.podMonitor.scrapeTimeout | string | `""` | |
| virtualKubelet.metrics.port | int | `9090` | port used to expose metrics. |
| virtualKubelet.virtualNode.extra.annotations | object | `{}` | virtual node extra annotations |
| virtualKubelet.virtualNode.extra.labels | object | `{}` | virtual node extra labels |
| webhook.failurePolicy | string | `"Fail"` | the webhook failure policy, among Ignore and Fail |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
{{- $vkargs = append $vkargs "--certificate-type=aws" }}
{{- end }}
{{- end }}
{{- if not (or (has "--enable-metrics" $vkargs ) (has "--enable-metrics=true" $vkargs ) (has "--enable-metrics=false" $vkargs )) }}
{{- $vkargs = append $vkargs "--enable-metrics=true" }}
{{- end}}

apiVersion: apps/v1
kind: Deployment
Expand Down
22 changes: 22 additions & 0 deletions deployments/liqo/templates/liqo-virtualkubelet-podmonitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{{- $kubeletMetricsConfig := (merge (dict "name" "virtual-kubelet" "module" "virtual-kubelet") .) -}}
{{- if .Values.virtualKubelet.metrics.podMonitor.enabled }}

apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: {{ include "liqo.prefixedName" $kubeletMetricsConfig }}
labels:
{{- include "liqo.labels" $kubeletMetricsConfig | nindent 4 }}
spec:
namespaceSelector:
any: true
selector:
matchLabels:
app.kubernetes.io/name: "virtual-kubelet"
app.kubernetes.io/component: "virtual-kubelet"
podMetricsEndpoints:
- port: metrics
interval: {{ .Values.virtualKubelet.metrics.podMonitor.interval }}
scrapeTimeout: {{ .Values.virtualKubelet.metrics.podMonitor.scrapeTimeout }}
{{- end }}

14 changes: 14 additions & 0 deletions deployments/liqo/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,20 @@ virtualKubelet:
annotations: {}
# -- virtual node extra labels
labels: {}
metrics:
# -- expose metrics about virtual kubelet resources.
enabled: false
# -- port used to expose metrics.
port: 9090
podMonitor:
# # -- create a prometheus podmonitor.
enabled: false
# # -- setup pod monitor requests interval. If empty, Prometheus uses the global scrape interval.
# # ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#endpoint
interval: ""
# # -- setup pod monitor scrape timeout. If empty, Prometheus uses the global scrape timeout.
# # ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#endpoint
scrapeTimeout: ""

uninstaller:
pod:
Expand Down
9 changes: 9 additions & 0 deletions pkg/liqoctl/install/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,15 @@ func (o *Options) values() map[string]interface{} {
},
},

"virtualKubelet": map[string]interface{}{
"metrics": map[string]interface{}{
"enabled": o.EnableMetrics,
"podMonitor": map[string]interface{}{
"enabled": o.EnableMetrics,
},
},
},

"telemetry": map[string]interface{}{
"enable": !o.DisableTelemetry,
},
Expand Down
16 changes: 16 additions & 0 deletions pkg/virtualKubelet/metrics/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright 2019-2023 The Liqo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package metrics provides a set of metrics for Virtual Kubelet component.
package metrics
84 changes: 84 additions & 0 deletions pkg/virtualKubelet/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Copyright 2019-2023 The Liqo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package metrics

import (
"net/http"
"os"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"k8s.io/klog/v2"
)

const (
// MetricsPort is the metrics port constant.
MetricsPort = ":9090"
)

var (
// ErrorsCounter is the counter of the errors occurred during the reflection.
ErrorsCounter *prometheus.CounterVec
// ItemsCounter is the counter of the reflected resources.
// A fast increase of this metric can indicate a race condition between local and remote operators.
ItemsCounter *prometheus.CounterVec
)

// Init initializes the metrics. If no error occurs or no item is processed, the corresponding metric is not exported.
func init() {
var MetricsLabels = []string{"namespace", "reflector_resource"}

ErrorsCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "liqo_virtual_kubelet_reflection_error_counter",
Help: "The counter of the transient errors.",
},
MetricsLabels,
)

ItemsCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "liqo_virtual_kubelet_reflection_item_counter",
Help: "The counter of the reflected resources. A fast increase of this metric can indicate a race condition between local and remote operators.",
},
MetricsLabels,
)
}

// SetupMetricHandler sets up the metric handler.
func SetupMetricHandler() {
// Register the metrics to the prometheus registry.
prometheus.MustRegister(ErrorsCounter)
// Register the metrics to the prometheus registry.
prometheus.MustRegister(ItemsCounter)

http.Handle("/metrics", promhttp.Handler())

go func() {
klog.Infof("Starting the virtual kubelet Metric Handler listening on %q", MetricsPort)

server := &http.Server{
Addr: ":1234",
ReadHeaderTimeout: 10 * time.Second,
}

// Key and certificate paths are not specified, since already configured as part of the TLSConfig.
if err := server.ListenAndServe(); err != nil {
klog.Errorf("Failed to start the Metric Handler: %v", err)
os.Exit(1)
}
}()
}
1 change: 1 addition & 0 deletions pkg/virtualKubelet/provider/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ type InitConfig struct {
EnableStorage bool
VirtualStorageClassName string
RemoteRealStorageClassName string
EnableMetrics bool

HomeAPIServerHost string
HomeAPIServerPort string
Expand Down
20 changes: 20 additions & 0 deletions pkg/virtualKubelet/reflection/generic/reflector.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"sync"
"time"

"github.com/prometheus/client_golang/prometheus"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
Expand All @@ -33,6 +34,7 @@ import (
"k8s.io/utils/trace"

traceutils "github.com/liqotech/liqo/pkg/utils/trace"
"github.com/liqotech/liqo/pkg/virtualKubelet/metrics"
"github.com/liqotech/liqo/pkg/virtualKubelet/reflection/manager"
"github.com/liqotech/liqo/pkg/virtualKubelet/reflection/options"
)
Expand Down Expand Up @@ -199,9 +201,27 @@ func (gr *reflector) processNextWorkItem() bool {

// Put the item back on the workqueue to handle any transient errors.
gr.workqueue.AddRateLimited(key)

// Increase the error counter metric.
metrics.ErrorsCounter.With(prometheus.Labels{"namespace": key.(types.NamespacedName).Namespace,
"reflector_resource": gr.name}).Inc()

if errors.As(err, &eae) {
// Put the item back on the workqueue after the given duration elapsed.
gr.workqueue.AddAfter(key, eae.duration)
return true
}

// Put the item back on the workqueue to handle any transient errors.
gr.workqueue.AddRateLimited(key)

return true
}

// Increase the item counter metric.
metrics.ItemsCounter.With(prometheus.Labels{"namespace": key.(types.NamespacedName).Namespace,
"reflector_resource": gr.name}).Inc()

// Finally, if no error occurs we Forget this item so it does not
// get queued again until another change happens.
gr.workqueue.Forget(key)
Expand Down
7 changes: 7 additions & 0 deletions pkg/vkMachinery/forge/forge.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,13 @@ func forgeVKContainers(
ValueFrom: &v1.EnvVarSource{FieldRef: &v1.ObjectFieldSelector{FieldPath: "status.podIP"}},
},
},
Ports: []v1.ContainerPort{
{
Name: "metrics",
ContainerPort: 9090,
Protocol: v1.ProtocolTCP,
},
},
},
}
}
Expand Down

0 comments on commit 4efc2ed

Please sign in to comment.