-
Notifications
You must be signed in to change notification settings - Fork 390
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add self-diagnostics example (#1846)
Example to demonstrate using tracing as a global error handler for errors generated by the OpenTelemetry Metrics SDK. In this example, measurements are recorded to exceed the cardinality limit, which triggers the error to be logged. This error is then emitted to `stdout` using `opentelemetry-appender-tracing` subscriber.
- Loading branch information
Showing
7 changed files
with
302 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
[package] | ||
name = "self-diagnostics" | ||
version = "0.1.0" | ||
edition = "2021" | ||
license = "Apache-2.0" | ||
publish = false | ||
|
||
[dependencies] | ||
opentelemetry = { path = "../../opentelemetry" } | ||
opentelemetry_sdk = { path = "../../opentelemetry-sdk", features = ["rt-tokio"]} | ||
opentelemetry-stdout = { path = "../../opentelemetry-stdout"} | ||
opentelemetry-appender-tracing = { path = "../../opentelemetry-appender-tracing"} | ||
tokio = { workspace = true, features = ["full"] } | ||
tracing = { workspace = true, features = ["std"]} | ||
tracing-core = { workspace = true } | ||
tracing-subscriber = { version = "0.3.18", features = ["env-filter","registry", "std"]} | ||
opentelemetry-otlp = { path = "../../opentelemetry-otlp", features = ["http-proto", "reqwest-client", "logs"] } | ||
once_cell ={ version = "1.19.0"} | ||
ctrlc = "3.4" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
FROM rust:1.51 | ||
COPY . /usr/src/basic-otlp-http/ | ||
WORKDIR /usr/src/basic-otlp-http/ | ||
RUN cargo build --release | ||
RUN cargo install --path . | ||
CMD ["/usr/local/cargo/bin/basic-otlp-http"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# Basic OpenTelemetry metrics example with custom error handler: | ||
|
||
This example shows how to setup the custom error handler for self-diagnostics. | ||
|
||
## Custom Error Handling: | ||
|
||
A custom error handler is set up to capture and record errors using the `tracing` crate's `error!` macro. These errors are then exported to a collector using the `opentelemetry-appender-tracing` crate, which utilizes the OTLP log exporter over `HTTP/protobuf`. As a result, any errors generated by the configured OTLP metrics pipeline are funneled through this custom error handler for proper recording and export. | ||
|
||
## Filtering logs from external dependencies of OTLP Exporter: | ||
|
||
The example configures a tracing `filter` to restrict logs from external crates (`hyper`, `tonic`, and `reqwest`) used by the OTLP Exporter to the `error` level. This helps prevent an infinite loop of log generation when these crates emit logs that are picked up by the tracing subscriber. | ||
|
||
## Ensure that the internally generated errors are logged only once: | ||
|
||
By using a hashset to track seen errors, the custom error handler ensures that the same error is not logged multiple times. This is particularly useful for handling scenarios where continuous error logging might occur, such as when the OpenTelemetry collector is not running. | ||
|
||
|
||
## Usage | ||
|
||
### `docker-compose` | ||
|
||
By default runs against the `otel/opentelemetry-collector:latest` image, and uses `reqwest-client` | ||
as the http client, using http as the transport. | ||
|
||
```shell | ||
docker-compose up | ||
``` | ||
|
||
In another terminal run the application `cargo run` | ||
|
||
The docker-compose terminal will display logs, traces, metrics. | ||
|
||
Press Ctrl+C to stop the collector, and then tear it down: | ||
|
||
```shell | ||
docker-compose down | ||
``` | ||
|
||
### Manual | ||
|
||
If you don't want to use `docker-compose`, you can manually run the `otel/opentelemetry-collector` container | ||
and inspect the logs to see traces being transferred. | ||
|
||
On Unix based systems use: | ||
|
||
```shell | ||
# From the current directory, run `opentelemetry-collector` | ||
docker run --rm -it -p 4318:4318 -v $(pwd):/cfg otel/opentelemetry-collector:latest --config=/cfg/otel-collector-config.yaml | ||
``` | ||
|
||
On Windows use: | ||
|
||
```shell | ||
# From the current directory, run `opentelemetry-collector` | ||
docker run --rm -it -p 4318:4318 -v "%cd%":/cfg otel/opentelemetry-collector:latest --config=/cfg/otel-collector-config.yaml | ||
``` | ||
|
||
Run the app which exports logs, metrics and traces via OTLP to the collector | ||
|
||
```shell | ||
cargo run | ||
``` | ||
|
||
### Output: | ||
|
||
- If the docker instance for collector is running, below error should be logged into the container. There won't be any logs from the `hyper`, `reqwest` and `tonic` crates. | ||
``` | ||
otel-collector-1 | 2024-06-05T17:09:46.926Z info LogsExporter {"kind": "exporter", "data_type": "logs", "name": "logging", "resource logs": 1, "log records": 1} | ||
otel-collector-1 | 2024-06-05T17:09:46.926Z info ResourceLog #0 | ||
otel-collector-1 | Resource SchemaURL: | ||
otel-collector-1 | Resource attributes: | ||
otel-collector-1 | -> telemetry.sdk.name: Str(opentelemetry) | ||
otel-collector-1 | -> telemetry.sdk.version: Str(0.23.0) | ||
otel-collector-1 | -> telemetry.sdk.language: Str(rust) | ||
otel-collector-1 | -> service.name: Str(unknown_service) | ||
otel-collector-1 | ScopeLogs #0 | ||
otel-collector-1 | ScopeLogs SchemaURL: | ||
otel-collector-1 | InstrumentationScope opentelemetry-appender-tracing 0.4.0 | ||
otel-collector-1 | LogRecord #0 | ||
otel-collector-1 | ObservedTimestamp: 2024-06-05 17:09:45.931951161 +0000 UTC | ||
otel-collector-1 | Timestamp: 1970-01-01 00:00:00 +0000 UTC | ||
otel-collector-1 | SeverityText: ERROR | ||
otel-collector-1 | SeverityNumber: Error(17) | ||
otel-collector-1 | Body: Str(OpenTelemetry metrics error occurred: Metrics error: Warning: Maximum data points for metric stream exceeded. Entry added to overflow. Subsequent overflows to same metric until next collect will not be logged.) | ||
otel-collector-1 | Attributes: | ||
otel-collector-1 | -> name: Str(event examples/self-diagnostics/src/main.rs:42) | ||
otel-collector-1 | Trace ID: | ||
otel-collector-1 | Span ID: | ||
otel-collector-1 | Flags: 0 | ||
otel-collector-1 | {"kind": "exporter", "data_type": "logs", "name": "logging"} | ||
``` | ||
|
||
- The SDK will keep trying to upload metrics at regular intervals if the collector's Docker instance is down. To avoid a logging loop, internal errors like 'Connection refused' will be attempted to be logged only once. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
version: "2" | ||
services: | ||
|
||
# Collector | ||
otel-collector: | ||
image: otel/opentelemetry-collector:latest | ||
command: ["--config=/etc/otel-collector-config.yaml", "${OTELCOL_ARGS}"] | ||
volumes: | ||
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml | ||
ports: | ||
- "4318:4318" # OTLP HTTP receiver |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# This is a configuration file for the OpenTelemetry Collector intended to be | ||
# used in conjunction with the opentelemetry-otlp example. | ||
# | ||
# For more information about the OpenTelemetry Collector see: | ||
# https://github.com/open-telemetry/opentelemetry-collector | ||
# | ||
receivers: | ||
otlp: | ||
protocols: | ||
grpc: | ||
http: | ||
|
||
exporters: | ||
debug: | ||
verbosity: detailed | ||
|
||
service: | ||
pipelines: | ||
traces: | ||
receivers: [otlp] | ||
exporters: [debug] | ||
metrics: | ||
receivers: [otlp] | ||
exporters: [debug] | ||
logs: | ||
receivers: [otlp] | ||
exporters: [debug] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
use opentelemetry::global::{self, set_error_handler, Error as OtelError}; | ||
use opentelemetry::KeyValue; | ||
use opentelemetry_appender_tracing::layer; | ||
use opentelemetry_otlp::WithExportConfig; | ||
use tracing_subscriber::prelude::*; | ||
use tracing_subscriber::EnvFilter; | ||
|
||
use std::error::Error; | ||
use tracing::error; | ||
|
||
use once_cell::sync::Lazy; | ||
use std::collections::HashSet; | ||
use std::sync::{Arc, Mutex}; | ||
|
||
use ctrlc; | ||
use std::sync::mpsc::channel; | ||
|
||
struct ErrorState { | ||
seen_errors: Mutex<HashSet<String>>, | ||
} | ||
|
||
impl ErrorState { | ||
fn new() -> Self { | ||
ErrorState { | ||
seen_errors: Mutex::new(HashSet::new()), | ||
} | ||
} | ||
|
||
fn mark_as_seen(&self, err: &OtelError) -> bool { | ||
let mut seen_errors = self.seen_errors.lock().unwrap(); | ||
seen_errors.insert(err.to_string()) | ||
} | ||
} | ||
|
||
static GLOBAL_ERROR_STATE: Lazy<Arc<ErrorState>> = Lazy::new(|| Arc::new(ErrorState::new())); | ||
|
||
fn custom_error_handler(err: OtelError) { | ||
if GLOBAL_ERROR_STATE.mark_as_seen(&err) { | ||
// log error not already seen | ||
match err { | ||
OtelError::Metric(err) => error!("OpenTelemetry metrics error occurred: {}", err), | ||
OtelError::Trace(err) => error!("OpenTelemetry trace error occurred: {}", err), | ||
OtelError::Log(err) => error!("OpenTelemetry log error occurred: {}", err), | ||
OtelError::Propagation(err) => { | ||
error!("OpenTelemetry propagation error occurred: {}", err) | ||
} | ||
OtelError::Other(err_msg) => error!("OpenTelemetry error occurred: {}", err_msg), | ||
_ => error!("OpenTelemetry error occurred: {:?}", err), | ||
} | ||
} | ||
} | ||
|
||
fn init_logger_provider() -> opentelemetry_sdk::logs::LoggerProvider { | ||
let provider = opentelemetry_otlp::new_pipeline() | ||
.logging() | ||
.with_exporter( | ||
opentelemetry_otlp::new_exporter() | ||
.http() | ||
.with_endpoint("http://localhost:4318/v1/logs"), | ||
) | ||
.install_batch(opentelemetry_sdk::runtime::Tokio) | ||
.unwrap(); | ||
|
||
// Add a tracing filter to filter events from crates used by opentelemetry-otlp. | ||
// The filter levels are set as follows: | ||
// - Allow `info` level and above by default. | ||
// - Restrict `hyper`, `tonic`, and `reqwest` to `error` level logs only. | ||
// This ensures events generated from these crates within the OTLP Exporter are not looped back, | ||
// thus preventing infinite event generation. | ||
// Note: This will also drop events from these crates used outside the OTLP Exporter. | ||
// For more details, see: https://github.com/open-telemetry/opentelemetry-rust/issues/761 | ||
let filter = EnvFilter::new("info") | ||
.add_directive("hyper=error".parse().unwrap()) | ||
.add_directive("tonic=error".parse().unwrap()) | ||
.add_directive("reqwest=error".parse().unwrap()); | ||
let cloned_provider = provider.clone(); | ||
let layer = layer::OpenTelemetryTracingBridge::new(&cloned_provider); | ||
tracing_subscriber::registry() | ||
.with(filter) | ||
.with(layer) | ||
.init(); | ||
provider | ||
} | ||
|
||
fn init_meter_provider() -> opentelemetry_sdk::metrics::SdkMeterProvider { | ||
let provider = opentelemetry_otlp::new_pipeline() | ||
.metrics(opentelemetry_sdk::runtime::Tokio) | ||
.with_period(std::time::Duration::from_secs(1)) | ||
.with_exporter( | ||
opentelemetry_otlp::new_exporter() | ||
.http() | ||
.with_endpoint("http://localhost:4318/v1/metrics"), | ||
) | ||
.build() | ||
.unwrap(); | ||
let cloned_provider = provider.clone(); | ||
global::set_meter_provider(cloned_provider); | ||
provider | ||
} | ||
|
||
#[tokio::main] | ||
async fn main() -> Result<(), Box<dyn Error + Send + Sync + 'static>> { | ||
// Set the custom error handler | ||
if let Err(err) = set_error_handler(custom_error_handler) { | ||
eprintln!("Failed to set custom error handler: {}", err); | ||
} | ||
|
||
let logger_provider = init_logger_provider(); | ||
|
||
// Initialize the MeterProvider with the stdout Exporter. | ||
let meter_provider = init_meter_provider(); | ||
|
||
// Create a meter from the above MeterProvider. | ||
let meter = global::meter("example"); | ||
// Create a Counter Instrument. | ||
let counter = meter.u64_counter("my_counter").init(); | ||
|
||
// Record measurements with unique key-value pairs to exceed the cardinality limit | ||
// of 2000 and trigger error message | ||
for i in 0..3000 { | ||
counter.add( | ||
10, | ||
&[KeyValue::new( | ||
format!("mykey{}", i), | ||
format!("myvalue{}", i), | ||
)], | ||
); | ||
} | ||
|
||
let (tx, rx) = channel(); | ||
|
||
ctrlc::set_handler(move || tx.send(()).expect("Could not send signal on channel.")) | ||
.expect("Error setting Ctrl-C handler"); | ||
|
||
println!("Press Ctrl-C to continue..."); | ||
rx.recv().expect("Could not receive from channel."); | ||
println!("Got Ctrl-C, Doing shutdown and existing."); | ||
|
||
// MeterProvider is configured with an OTLP Exporter to export metrics every 1 second, | ||
// however shutting down the MeterProvider here instantly flushes | ||
// the metrics, instead of waiting for the 1 sec interval. | ||
meter_provider.shutdown()?; | ||
let _ = logger_provider.shutdown(); | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters