From f0fc36034a000e9bfec5ebc3091e0eae31117bf6 Mon Sep 17 00:00:00 2001 From: heydbut Date: Wed, 16 Aug 2023 16:46:21 -0400 Subject: [PATCH 1/2] feat: add nats collector compose example --- collector/nats/examples/compose/README.md | 36 +++++++++++++ .../nats/examples/compose/collector.yaml | 20 +++++++ .../nats/examples/compose/docker-compose.yaml | 53 +++++++++++++++++++ 3 files changed, 109 insertions(+) create mode 100644 collector/nats/examples/compose/README.md create mode 100644 collector/nats/examples/compose/collector.yaml create mode 100644 collector/nats/examples/compose/docker-compose.yaml diff --git a/collector/nats/examples/compose/README.md b/collector/nats/examples/compose/README.md new file mode 100644 index 0000000..3cf3931 --- /dev/null +++ b/collector/nats/examples/compose/README.md @@ -0,0 +1,36 @@ +# Monitor NATS with the OpenTelemetry Collector for Lightstep + +## Overview + +NATS is a simple, secure, and high-performance open source messaging system. To maintain the reliability and performance of a NATS setup, it's crucial to have real-time monitoring. With the OpenTelemetry Collector, metrics from NATS can be effectively channeled to Lightstep for comprehensive analysis and visualization. This README will guide you through integrating NATS metrics with Lightstep using the OpenTelemetry Collector. + +## Prerequisites + +* Docker +* Docker Compose +* A Lightstep Observability account +* Lightstep Observability [access token][ls-docs-access-token] + +## How to set it up + +1. **Export your Lightstep access token**: + ```bash + export LS_ACCESS_TOKEN= + ``` +2. **Run the docker compose example to spin up NATS and the OpenTelemetry Collector**: + ```bash + docker-compose up -d + ``` +3. **Access the NATS dashboard**: Depending on your setup, typically you can visit a web UI to manage and observe NATS. In our case, you can NATS node 1 dashboard at `http://localhost:8222/`. +4. **Monitor NATS Metrics in Lightstep**: After setting things up, NATS metrics should start populating in your Lightstep dashboard. +5. **Shutting down the monitoring setup**: + ```bash + docker-compose down + ``` + +## Configuring NATS for Advanced Monitoring + +This guide assumes a standard NATS setup. However, NATS offers a variety of configuration options that can impact the metrics it provides. For in-depth configurations and best practices, always refer to [the official NATS documentation][nats-docs]. + +[ls-docs-access-token]: https://docs.lightstep.com/docs/create-and-manage-access-tokens +[nats-docs]: https://docs.nats.io/running-a-nats-service/nats_admin/monitoring diff --git a/collector/nats/examples/compose/collector.yaml b/collector/nats/examples/compose/collector.yaml new file mode 100644 index 0000000..e004473 --- /dev/null +++ b/collector/nats/examples/compose/collector.yaml @@ -0,0 +1,20 @@ +receivers: + prometheus/nats: + config: + scrape_configs: + - job_name: 'nats' + scrape_interval: 10s + static_configs: + - targets: ["nats-exporter-1:7777", "nats-exporter-2:7777", "nats-exporter-3:7777"] +exporters: + logging: + loglevel: debug + otlp: + endpoint: ingest.lightstep.com:443 + headers: + - lightstep-access-token: ${LS_ACCESS_TOKEN} +service: + pipelines: + metrics: + receivers: [prometheus/nats] + exporters: [logging, otlp] diff --git a/collector/nats/examples/compose/docker-compose.yaml b/collector/nats/examples/compose/docker-compose.yaml new file mode 100644 index 0000000..4d02d7a --- /dev/null +++ b/collector/nats/examples/compose/docker-compose.yaml @@ -0,0 +1,53 @@ +version: "3.9" + +services: + otel-collector: + image: otel/opentelemetry-collector-contrib:${OTEL_VERSION:-0.81.0} + hostname: otel-collector + restart: always + command: [ "--config=/conf/collector.yaml" ] + volumes: + - ./collector.yaml:/conf/collector.yaml:rw + environment: + LS_ACCESS_TOKEN: ${LS_ACCESS_TOKEN} + + nats-node-1: + image: nats:2.9.12-alpine3.17 + command: "--cluster_name spooky --cluster nats://0.0.0.0:6222 --http_port 8222 -js -sd /data -n nats-node-1 --routes=nats://nats-node-2:6222,nats://nats-node-3:6222 -p 4222" + ports: + - "4222:4222" # for local testing using nats cli + - "8222:8222" # access to nats monitoring dashboard + volumes: + - nats-cluster_data-1:/data + + nats-node-2: + image: nats:2.9.12-alpine3.17 + command: "--cluster_name spooky --cluster nats://0.0.0.0:6222 --http_port 8222 -js -sd /data -n nats-node-2 --routes=nats://nats-node-1:6222,nats://nats-node-3:6222 -p 4222" + volumes: + - nats-cluster_data-2:/data + + nats-node-3: + image: nats:2.9.12-alpine3.17 + command: "--cluster_name spooky --cluster nats://0.0.0.0:6222 --http_port 8222 -js -sd /data -n nats-node-3 --routes=nats://nats-node-1:6222,nats://nats-node-2:6222 -p 4222" + volumes: + - nats-cluster_data-3:/data + + nats-exporter-1: + image: natsio/prometheus-nats-exporter:0.11.0 + command: "-varz -jsz=all http://nats-node-1:8222" + + nats-exporter-2: + image: natsio/prometheus-nats-exporter:0.11.0 + command: "-varz -jsz=all http://nats-node-2:8222" + + nats-exporter-3: + image: natsio/prometheus-nats-exporter:0.11.0 + command: "-varz -jsz=all http://nats-node-3:8222" + +volumes: + nats-cluster_data-1: + driver: local + nats-cluster_data-2: + driver: local + nats-cluster_data-3: + driver: local From c2aa289e1f2c7dcb74f03d46006c266aecd7cf2d Mon Sep 17 00:00:00 2001 From: heydbut Date: Wed, 23 Aug 2023 15:34:39 -0400 Subject: [PATCH 2/2] Add metrics.csv --- collector/nats/examples/compose/metrics.csv | 89 +++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 collector/nats/examples/compose/metrics.csv diff --git a/collector/nats/examples/compose/metrics.csv b/collector/nats/examples/compose/metrics.csv new file mode 100644 index 0000000..4389328 --- /dev/null +++ b/collector/nats/examples/compose/metrics.csv @@ -0,0 +1,89 @@ +Name,Description,Unit,DataType,Attributes +gnatsd_varz_auth_timeout,auth_timeout,,gauge, +gnatsd_varz_cluster_name,cluster_name,,gauge, +gnatsd_varz_connections,connections,,gauge, +gnatsd_varz_cores,cores,,gauge, +gnatsd_varz_cpu,cpu,,gauge, +gnatsd_varz_gomaxprocs,gomaxprocs,,gauge, +gnatsd_varz_http_port,http_port,,gauge, +gnatsd_varz_http_req_stats_varz,http_req_stats_varz,,gauge, +gnatsd_varz_https_port,https_port,,gauge, +gnatsd_varz_in_bytes,in_bytes,,gauge, +gnatsd_varz_in_msgs,in_msgs,,gauge, +gnatsd_varz_jetstream_config_max_memory,jetstream_config_max_memory,,gauge, +gnatsd_varz_jetstream_config_max_storage,jetstream_config_max_storage,,gauge, +gnatsd_varz_jetstream_meta_cluster_size,jetstream_meta_cluster_size,,gauge, +gnatsd_varz_jetstream_meta_name,jetstream_meta_name,,gauge, +gnatsd_varz_jetstream_stats_accounts,jetstream_stats_accounts,,gauge, +gnatsd_varz_jetstream_stats_api_errors,jetstream_stats_api_errors,,gauge, +gnatsd_varz_jetstream_stats_api_total,jetstream_stats_api_total,,gauge, +gnatsd_varz_jetstream_stats_ha_assets,jetstream_stats_ha_assets,,gauge, +gnatsd_varz_jetstream_stats_memory,jetstream_stats_memory,,gauge, +gnatsd_varz_jetstream_stats_reserved_memory,jetstream_stats_reserved_memory,,gauge, +gnatsd_varz_jetstream_stats_reserved_storage,jetstream_stats_reserved_storage,,gauge, +gnatsd_varz_jetstream_stats_storage,jetstream_stats_storage,,gauge, +gnatsd_varz_leafnodes,leafnodes,,gauge, +gnatsd_varz_max_connections,max_connections,,gauge, +gnatsd_varz_max_control_line,max_control_line,,gauge, +gnatsd_varz_max_payload,max_payload,,gauge, +gnatsd_varz_max_pending,max_pending,,gauge, +gnatsd_varz_mem,mem,,gauge, +gnatsd_varz_out_bytes,out_bytes,,gauge, +gnatsd_varz_out_msgs,out_msgs,,gauge, +gnatsd_varz_ping_interval,ping_interval,,gauge, +gnatsd_varz_ping_max,ping_max,,gauge, +gnatsd_varz_port,port,,gauge, +gnatsd_varz_proto,proto,,gauge, +gnatsd_varz_remotes,remotes,,gauge, +gnatsd_varz_routes,routes,,gauge, +gnatsd_varz_server_id,server_id,,gauge, +gnatsd_varz_server_name,server_name,,gauge, +gnatsd_varz_slow_consumers,slow_consumers,,gauge, +gnatsd_varz_subscriptions,subscriptions,,gauge, +gnatsd_varz_tls_timeout,tls_timeout,,gauge, +gnatsd_varz_total_connections,total_connections,,gauge, +gnatsd_varz_version,version,,gauge, +gnatsd_varz_write_deadline,write_deadline,,gauge, +go_gc_duration_seconds,A summary of the pause duration of garbage collection cycles.,seconds,summary, +go_goroutines,Number of goroutines that currently exist.,,gauge, +go_info,Information about the Go environment.,,gauge, +go_memstats_alloc_bytes,Number of bytes allocated and still in use.,bytes,gauge, +go_memstats_alloc_bytes_total,Total number of bytes allocated even if freed.,,bytes,counter +go_memstats_buck_hash_sys_bytes,Number of bytes used by the profiling bucket hash table.,bytes,gauge, +go_memstats_frees_total,Total number of frees.,,counter, +go_memstats_gc_sys_bytes,Number of bytes used for garbage collection system metadata.,bytes,gauge, +go_memstats_heap_alloc_bytes,Number of heap bytes allocated and still in use.,bytes,gauge, +go_memstats_heap_idle_bytes,Number of heap bytes waiting to be used.,bytes,gauge, +go_memstats_heap_inuse_bytes,Number of heap bytes that are in use.,bytes,gauge, +go_memstats_heap_objects,Number of allocated objects.,,gauge, +go_memstats_heap_released_bytes,Number of heap bytes released to OS.,bytes,gauge, +go_memstats_heap_sys_bytes,Number of heap bytes obtained from system.,bytes,gauge, +go_memstats_last_gc_time_seconds,Number of seconds since 1970 of last garbage collection.,seconds,gauge, +go_memstats_lookups_total,Total number of pointer lookups.,,counter, +go_memstats_mallocs_total,Total number of mallocs.,,counter, +go_memstats_mcache_inuse_bytes,Number of bytes in use by mcache structures.,bytes,gauge, +go_memstats_mcache_sys_bytes,Number of bytes used for mcache structures obtained from system.,bytes,gauge, +go_memstats_mspan_inuse_bytes,Number of bytes in use by mspan structures.,bytes,gauge, +go_memstats_mspan_sys_bytes,Number of bytes used for mspan structures obtained from system.,bytes,gauge, +go_memstats_next_gc_bytes,Number of heap bytes when next garbage collection will take place.,bytes,gauge, +go_memstats_other_sys_bytes,Number of bytes used for other system allocations.,bytes,gauge, +go_memstats_stack_inuse_bytes,Number of bytes in use by the stack allocator.,bytes,gauge, +go_memstats_stack_sys_bytes,Number of bytes obtained from system for stack allocator.,bytes,gauge, +go_memstats_sys_bytes,Number of bytes obtained from system.,bytes,gauge, +go_threads,Number of OS threads created.,,gauge, +jetstream_server_jetstream_disabled,JetStream disabled or not,,gauge, +jetstream_server_max_memory,JetStream Max Memory,,gauge, +jetstream_server_max_storage,JetStream Max Storage,,gauge, +jetstream_server_total_consumers,Total number of consumers in JetStream,,gauge, +jetstream_server_total_message_bytes,Total number of bytes stored in JetStream,,gauge, +jetstream_server_total_messages,Total number of stored messages in JetStream,,gauge, +jetstream_server_total_streams,Total number of streams in JetStream,,gauge, +process_cpu_seconds_total,Total user and system CPU time spent in seconds.,seconds,counter, +process_max_fds,Maximum number of open file descriptors.,,gauge, +process_open_fds,Number of open file descriptors.,,gauge, +process_resident_memory_bytes,Resident memory size in bytes.,bytes,gauge, +process_start_time_seconds,Start time of the process since Unix epoch in seconds.,seconds,gauge, +process_virtual_memory_bytes,Virtual memory size in bytes.,bytes,gauge, +process_virtual_memory_max_bytes,Maximum amount of virtual memory available in bytes.,bytes,gauge, +promhttp_metric_handler_requests_in_flight,Current number of scrapes being served.,,gauge, +promhttp_metric_handler_requests_total,Total number of scrapes by HTTP status code.,,counter,