Skip to content

Commit

Permalink
feat: prometheus metrics
Browse files Browse the repository at this point in the history
Co-authored-by: Masaru Hoshi <masaru.hoshi@qlik.com>
Co-authored-by: Tim Luimes <tluimes@gmail.com>
  • Loading branch information
3 people committed Feb 8, 2022
1 parent 6845fe7 commit 77ab33b
Show file tree
Hide file tree
Showing 8 changed files with 129 additions and 12 deletions.
3 changes: 2 additions & 1 deletion README.md
Expand Up @@ -28,9 +28,10 @@ It is built to be extendable and currently aims to support the following signing

It provides several additional features:

- [Metrics](docs/features/metrics.md): *get prometheus metrics at `/metrics`*
- [Alerting](docs/features/alerting.md): *send alerts based on verification result*
- [Detection Mode](docs/features/detection_mode.md): *warn but do not block invalid images*
- [Namespaced Validation](docs/features/namespaced_validation.md): *restrict validation to dedicated namespaces*
- [Alerting](docs/features/alerting.md): *send alerts based on verification result*
- [Automatic Child Approval](docs/features/automatic_child_approval.md): *configure approval of Kubernetes child resources*


Expand Down
30 changes: 30 additions & 0 deletions connaisseur/flask_application.py
Expand Up @@ -4,6 +4,7 @@
import traceback

from flask import Flask, jsonify, request
from prometheus_flask_exporter import PrometheusMetrics, NO_PREFIX

from connaisseur.admission_request import AdmissionRequest
from connaisseur.alert import send_alerts
Expand All @@ -24,6 +25,15 @@
CONFIG = Config()
DETECTION_MODE = os.environ.get("DETECTION_MODE", "0") == "1"

metrics = PrometheusMetrics(
APP,
defaults_prefix=NO_PREFIX,
buckets=(0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 10.0, 15.0, 20, 30.0, float("inf")),
)
"""
Provides metrics for the Flask application
"""


@APP.errorhandler(AlertSendingError)
def handle_alert_sending_failure(err):
Expand All @@ -41,6 +51,14 @@ def handle_alert_config_error(err):


@APP.route("/mutate", methods=["POST"])
@metrics.counter(
"mutate_requests_total",
"Total number of mutate requests",
labels={
"allowed": lambda r: metrics_label(r, "allowed"),
"status_code": lambda r: metrics_label(r, "status_code"),
},
)
def mutate():
"""
Handle the '/mutate' path and accept CREATE and UPDATE requests.
Expand Down Expand Up @@ -74,8 +92,19 @@ def mutate():
return jsonify(response)


def metrics_label(response, label):
json_response = response.get_json(silent=True)
if json_response:
if label == "allowed":
return json_response["response"]["allowed"]
elif label == "status_code":
return json_response["response"]["status"]["code"]
return json_response


# health probe
@APP.route("/health", methods=["GET", "POST"])
@metrics.do_not_track()
def healthz():
"""
Handle the '/health' endpoint and check the health status of the web server.
Expand All @@ -87,6 +116,7 @@ def healthz():

# readiness probe
@APP.route("/ready", methods=["GET", "POST"])
@metrics.do_not_track()
def readyz():
return "", 200

Expand Down
3 changes: 2 additions & 1 deletion docs/README.md
Expand Up @@ -17,9 +17,10 @@ It is built to be extendable and currently aims to support the following signing

It provides several additional features:

- [Metrics](features/metrics.md): *get prometheus metrics at `/metrics`*
- [Alerting](features/alerting.md): *send alerts based on verification result*
- [Detection Mode](features/detection_mode.md): *warn but do not block invalid images*
- [Namespaced Validation](features/namespaced_validation.md): *restrict validation to dedicated namespaces*
- [Alerting](features/alerting.md): *send alerts based on verification result*
- [Automatic Child Approval](features/automatic_child_approval.md): *configure approval of Kubernetes child resources*

Feel free to reach out via [GitHub Discussions](https://github.com/sse-secure-systems/connaisseur/discussions)!
Expand Down
80 changes: 80 additions & 0 deletions docs/features/metrics.md
@@ -0,0 +1,80 @@
# Metrics

Connaisseur exposes metrics about usage of the `/mutate` endpoint and general information about the python process using [Prometheus Flask Exporter](https://pypi.org/project/prometheus-flask-exporter/) through the `/metrics` endpoint.

This for example allows visualizing the number of allowed or denied resource requests.

## Example

```
# HELP python_gc_objects_collected_total Objects collected during gc
# TYPE python_gc_objects_collected_total counter
python_gc_objects_collected_total{generation="0"} 4422.0
python_gc_objects_collected_total{generation="1"} 1866.0
python_gc_objects_collected_total{generation="2"} 0.0
# HELP python_gc_objects_uncollectable_total Uncollectable object found during GC
# TYPE python_gc_objects_uncollectable_total counter
python_gc_objects_uncollectable_total{generation="0"} 0.0
python_gc_objects_uncollectable_total{generation="1"} 0.0
python_gc_objects_uncollectable_total{generation="2"} 0.0
# HELP python_gc_collections_total Number of times this generation was collected
# TYPE python_gc_collections_total counter
python_gc_collections_total{generation="0"} 163.0
python_gc_collections_total{generation="1"} 14.0
python_gc_collections_total{generation="2"} 1.0
# HELP python_info Python platform information
# TYPE python_info gauge
python_info{implementation="CPython",major="3",minor="10",patchlevel="2",version="3.10.2"} 1.0
# HELP process_virtual_memory_bytes Virtual memory size in bytes.
# TYPE process_virtual_memory_bytes gauge
process_virtual_memory_bytes 6.1161472e+07
# HELP process_resident_memory_bytes Resident memory size in bytes.
# TYPE process_resident_memory_bytes gauge
process_resident_memory_bytes 4.595712e+07
# HELP process_start_time_seconds Start time of the process since unix epoch in seconds.
# TYPE process_start_time_seconds gauge
process_start_time_seconds 1.6436681112e+09
# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
# TYPE process_cpu_seconds_total counter
process_cpu_seconds_total 3.3
# HELP process_open_fds Number of open file descriptors.
# TYPE process_open_fds gauge
process_open_fds 12.0
# HELP process_max_fds Maximum number of open file descriptors.
# TYPE process_max_fds gauge
process_max_fds 1.048576e+06
# HELP exporter_info Information about the Prometheus Flask exporter
# TYPE exporter_info gauge
exporter_info{version="0.18.7"} 1.0
# HELP http_request_duration_seconds Flask HTTP request duration in seconds
# TYPE http_request_duration_seconds histogram
http_request_duration_seconds_bucket{le="0.1",method="POST",path="/mutate",status="200"} 5.0
http_request_duration_seconds_bucket{le="0.25",method="POST",path="/mutate",status="200"} 5.0
http_request_duration_seconds_bucket{le="0.5",method="POST",path="/mutate",status="200"} 5.0
http_request_duration_seconds_bucket{le="0.75",method="POST",path="/mutate",status="200"} 8.0
http_request_duration_seconds_bucket{le="1.0",method="POST",path="/mutate",status="200"} 8.0
http_request_duration_seconds_bucket{le="2.5",method="POST",path="/mutate",status="200"} 9.0
http_request_duration_seconds_bucket{le="+Inf",method="POST",path="/mutate",status="200"} 9.0
http_request_duration_seconds_count{method="POST",path="/mutate",status="200"} 9.0
http_request_duration_seconds_sum{method="POST",path="/mutate",status="200"} 3.6445974350208417
# HELP http_request_duration_seconds_created Flask HTTP request duration in seconds
# TYPE http_request_duration_seconds_created gauge
http_request_duration_seconds_created{method="POST",path="/mutate",status="200"} 1.643668194758098e+09
# HELP http_request_total Total number of HTTP requests
# TYPE http_request_total counter
http_request_total{method="POST",status="200"} 9.0
# HELP http_request_created Total number of HTTP requests
# TYPE http_request_created gauge
http_request_created{method="POST",status="200"} 1.6436681947581613e+09
# HELP http_request_exceptions_total Total number of HTTP requests which resulted in an exception
# TYPE http_request_exceptions_total counter
# HELP mutate_requests_total Total number of mutate requests
# TYPE mutate_requests_total counter
mutate_requests_total{allowed="False",status_code="403"} 4.0
mutate_requests_total{allowed="True",status_code="202"} 5.0
# HELP mutate_requests_created Total number of mutate requests
# TYPE mutate_requests_created gauge
mutate_requests_created{allowed="False",status_code="403"} 1.643760946491879e+09
mutate_requests_created{allowed="True",status_code="202"} 1.6437609592007663e+09
```

4 changes: 2 additions & 2 deletions helm/values.yaml
Expand Up @@ -147,9 +147,9 @@ namespacedValidation:
mode: ignore # 'ignore' or 'validate'

# automatic child approval determines how admission of Kubernetes child resources is handled by Connaisseur.
# per default, Connaisseur validates and mutates all resources, e.g. deployments, replicaSets, pods, and
# per default, Connaisseur validates and mutates all resources, e.g. deployments, replicaSets, pods, and
# automatically approves child resources of those to avoid duplicate validation and inconsistencies with the
# image policy. when disabled Connaisseur will only validate and mutate pods. check the docs for more
# image policy. when disabled Connaisseur will only validate and mutate pods. check the docs for more
# information.
# NOTE: configuration of automatic child approval is in EXPERIMENTAL state.
automaticChildApproval:
Expand Down
3 changes: 2 additions & 1 deletion mkdocs.yml
Expand Up @@ -92,9 +92,10 @@ nav:
- validators/notaryv2.md
- Features:
- features/README.md
- features/metrics.md
- features/alerting.md
- features/detection_mode.md
- features/namespaced_validation.md
- features/alerting.md
- features/automatic_child_approval.md
- Security:
- threat_model.md
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Expand Up @@ -5,6 +5,7 @@ Flask~=2.0.2
Jinja2~=3.0.3
jsonschema~=4.4.0
parsedatetime~=2.6
prometheus-flask-exporter~=0.18.7
python-dateutil~=2.8.2
pytz~=2021.3
PyYAML~=6.0
Expand Down
17 changes: 10 additions & 7 deletions tests/test_flask_application.py
Expand Up @@ -116,11 +116,13 @@ def test_mutate_calls_send_alert_for_invalid_admission_request(


def test_healthz():
assert pytest.fa.healthz() == ("", 200)
with pytest.fa.APP.test_request_context():
assert pytest.fa.healthz() == ("", 200)


def test_readyz():
assert pytest.fa.readyz() == ("", 200)
with pytest.fa.APP.test_request_context():
assert pytest.fa.readyz() == ("", 200)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -231,8 +233,9 @@ def test_error_handler(

mocker.patch("connaisseur.flask_application.__admit", return_value=True)
mock_function = mocker.patch(**function)
client = pytest.fa.APP.test_client()
mock_request_data = fix.get_admreq("deployments")
response = client.post("/mutate", json=mock_request_data)
assert response.status_code == 500
assert response.get_data().decode() == err
with pytest.fa.APP.test_request_context():
client = pytest.fa.APP.test_client()
mock_request_data = fix.get_admreq("deployments")
response = client.post("/mutate", json=mock_request_data)
assert response.status_code == 500
assert response.get_data().decode() == err

0 comments on commit 77ab33b

Please sign in to comment.