From 37d1738396a0f8b166d8475ac18a1bbf0628bc88 Mon Sep 17 00:00:00 2001
From: Pavol Loffay
Date: Tue, 31 Oct 2023 19:47:24 +0100
Subject: [PATCH] Add correlation
Signed-off-by: Pavol Loffay
---
07-correlation.md | 140 +++++++++++++++++++++++++++++++++++--
app/frontend/index.js | 2 +-
app/frontend/instrument.js | 8 +++
3 files changed, 145 insertions(+), 5 deletions(-)
diff --git a/07-correlation.md b/07-correlation.md
index 80fe1f0..d7d6c8e 100644
--- a/07-correlation.md
+++ b/07-correlation.md
@@ -1,14 +1,146 @@
# Correlation
+SRE workflow usually starts by a triggered alert with metric that went over a threshold.
+From there the investigation starts by looking at logs and traces. Therefore, it is important
+to be able to correlate these three signal types together.
+
+In general all signals can be correlated by time and resource (from where the data was reported).
+However, there are other correlation techniques as well e.g. trace exemplars.
+
## Collecting Kubernetes resource attributes
-* Use k8sresource attribute processor
-* Attach k8s labels as metric labels
+In the Kubernetes environment it is crucial to identify from where the telemetry data was reported.
+It is important to know exactly which container, pod or deployment created the data but as well on which
+node and cluster it was running.
+
+The [Kubernetes resource attributes](https://github.com/open-telemetry/semantic-conventions/blob/main/docs/resource/k8s.md) are prefixed with `k8s`: `k8s.pod.name`, `k8s.pod.uid` etc.
+
+The Kubernetes resource attributes can be added to metrics in a couple of different ways:
+1. in OpenTelemetry SDK / `OTEL_RESOURCE_ATTRIBUTES` environment variable
+2. in collector [k8sattributesprocessor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/k8sattributesprocessor)
+
+### OpenTelemetry SDK / `OTEL_RESOURCE_ATTRIBUTES`
+
+The OpenTelemetry operator injects `OTEL_RESOURCE_ATTRIBUTES` with Kubernetes resource attributes to the application container in a pod which injects the OpenTelemetry collector as a sidecar.
+The operator uses Kubernetes downward API to get Kubernetes attributes.
+
+```bash
+sidecar.opentelemetry.io/inject: "true"
+```
+
+### k8s Attributes Processor
+
+This processor is the most sophisticated processor for collecting Kubernetes resource attributes.
+It as well allows to collect pod, namespace and node labels and annotations.
+
+The k8sattributeprocessor queries k8s API server to discover all running pods in a cluster.
+It keeps a record of their IP addresses, pod UIDs and interesting metadata.
+The rules for associating the data passing through the processor (spans, metrics and logs) with specific Pod Metadata are configured via `pod_association` key.
+By default, it associates the incoming connection IP to the Pod IP.
+
+The processor requires following RBAC to query the API server:
+
+```yaml
+kubectl apply -f - <
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ name: otel-collector
+rules:
+- apiGroups: [""]
+ resources: ["pods", "namespaces"]
+ verbs: ["get", "watch", "list"]
+- apiGroups: ["apps"]
+ resources: ["replicasets"]
+ verbs: ["get", "list", "watch"]
+- apiGroups: ["extensions"]
+ resources: ["replicasets"]
+ verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: otel-collector
+subjects:
+- kind: ServiceAccount
+ name: collector
+ namespace:
+roleRef:
+ kind: ClusterRole
+ name: otel-collector
+ apiGroup: rbac.authorization.k8s.io
+EOF
+```
+
+```yaml
+ processors:
+ k8sattributes:
+ passthrough: false # when true only pod IP addresses are added, that can be used later for attributes association
+ extract:
+ annotations:
+ - tag_name: a1 # extracts value of annotation from pods with key `annotation-one` and inserts it as a tag with key `a1`
+ key: annotation-one
+ from: pod
+```
+
+## Resource Detection Processor
+
+The [resourcedetectionprocessor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor) can
+be used to detect the resource information from the host. Several detectors are supported:
+
+* `env`: read attributes from `OTEL_RESOURCE_ATTRIBUTES`
+* `system`: `host.name`, `host.arch`, `host.id`, `host.cpu.model.name`, `host.cpu.vendor.id`
+* `docker`: `host.name`, `os.type`
+* `heroku`: `heroku.app.id`, `heroku.release.commit`, `service.name`
+* `gcp`: `cloud.provider` (`gcp`), `cloud.platform` (`gcp_app_engine`), `cloud.region` (`us-central1`), `cloud.availability_zone` (`us-central1-c`), `gcp.gce.instance.hostname`
+* `openshift`: `cloud.provider`, `cloud.platform`, `cloud.region`, `k8s.cluster.name`
+
+```yaml
+processors:
+ resourcedetection:
+ detectors: [env, system]
+ timeout: 2s
+ override: false
+```
## Exemplars
-* Are exemplars supported with OTLP?
-* Is there a way to generate exemplars in the collector?
+[Exemplars](https://opentelemetry.io/docs/specs/otel/metrics/sdk/#exemplar) allow correlation
+between aggregated metric data and the original API calls where measurements are recorded.
+Exemplars work for trace-metric correlation across any metric, not just those that can also be derived from Spans.
+
+
+* Not all OpenTelemetry SDKs support exemplars:
+* https://github.com/open-telemetry/opentelemetry-go/issues/559
+* https://github.com/open-telemetry/opentelemetry-js/issues/2594
+* https://github.com/open-telemetry/opentelemetry-python/issues/2407
+
+### Spanmetrics Connector
+
+The [spanmetrics](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/connector/spanmetricsconnector)
+connector aggregates Request, Error and Duration (R.E.D) OpenTelemetry metrics from span data.
+It supports exemplars.
+
+```yaml
+connectors:
+ spanmetrics:
+ exemplars:
+ enabled: true
+service:
+ pipelines:
+ traces:
+ receivers: [otlp]
+ exporters: [spanmetrics]
+ metrics:
+ receivers: [spanmetrics]
+ exporters: [otlp]
+```
## Trace-context baggage
diff --git a/app/frontend/index.js b/app/frontend/index.js
index 07b13c7..84f351e 100644
--- a/app/frontend/index.js
+++ b/app/frontend/index.js
@@ -6,7 +6,7 @@ const http = require("http");
const app = require("express")();
const pino = require('pino-http')()
-var otelsdkinit = require('./instrument.js');
+//var otelsdkinit = require('./instrument.js');
app.use(pino)
diff --git a/app/frontend/instrument.js b/app/frontend/instrument.js
index 1c10aaa..128f64a 100644
--- a/app/frontend/instrument.js
+++ b/app/frontend/instrument.js
@@ -14,7 +14,15 @@ const { OTLPMetricExporter } = require('@opentelemetry/exporter-metrics-otlp-grp
const { PeriodicExportingMetricReader, MeterProvider, ConsoleMetricExporter } = require('@opentelemetry/sdk-metrics')
+const { Resource } = require('@opentelemetry/resources');
+const { SemanticResourceAttributes } = require('@opentelemetry/semantic-conventions');
+
const sdk = new opentelemetry.NodeSDK({
+ resource: new Resource({
+// [SemanticResourceAttributes.SERVICE_NAME]: 'frontend',
+// [SemanticResourceAttributes.SERVICE_VERSION]: '0.1.0',
+ ["my-org-service-version"]: '2.0.1',
+ }),
traceExporter: new OTLPTraceExporter(),
metricReader: new PeriodicExportingMetricReader({
// exporter: new OTLPMetricExporter(),