diff --git a/azure/Makefile b/azure/Makefile new file mode 100644 index 0000000..e43ed1d --- /dev/null +++ b/azure/Makefile @@ -0,0 +1,192 @@ +# Unique random ID encoded in resource names, will be set on first apply and +# used for all subsequent operations. +# Doesn't matter if you put something you choose here. +# You need to reset this after the first run to the random value provided for certain management commands. +PROJECT_ID = 4a9023 + +# Collector image +COLLECTOR_IMAGE = otel/opentelemetry-collector +LOCAL_COLLECTOR_NAME = "local_bridge_collector" + +# TODO: +# a) set these to container registry and image names +# b) update settings for container apps to pull from same place +REGISTRY_NAME := +REMOTE_REGISTRY := + +IMAGE_OTELCOL := +IMAGE_SCC := +TAG_OTELCOL := +TAG_SCC := + +# List of valid ENV values +VALID_ENVS = staging meta public +# default ENV +ENV ?= staging + +# List of all tools +TOOLS = az terraform tflint aztfexport +# TODO: make install-tools recipe + +# Terraform commands are executed in the terraform/environments/$(ENV) directory +TF_DIR := terraform +TF_ENV_DIR := environments/$(ENV) +TF_PLAN := $(TF_ENV_DIR)/terraform.tfplan +TF_STATE := $(TF_DIR)/terraform.tfstate + +# require AZTFX_RG and AZTFX_DIR - this for exporting +AZTFX_RG := $(shell echo $$AZTFX_RG) +AZTFX_DIR := _export_$(shell echo $$AZTFX_DIR) +AZTFX_FLAGS := --parallelism 1 --non-interactive --provider-version 3.93.0 --continue --include-role-assignment --output-dir $(AZTFX_DIR) + +# filesets +TFLINT_CACHE = .tflint.d +TF_CLEAN_FILES = *.tfstate *.tfstate.backup .terraform $(TF_DIR)/{*.tfstate*,.terraform.lock.hcl} $(TF_DIR)/$(TF_ENV_DIR)/{*.tfstate*,.terraform.lock.hcl} + +###################################################################### +# Project level rules - run in the ./terraform directory +###################################################################### + +all: check plan apply upload-collector-configs + +.PHONY: login +login: + @echo "Logging in to Azure" + az login + +.PHONY: check +check: fmt validate lint + + +###################################################################### +# Environment level rules - run in $(TF_ENV_DIR) which is defined +# as ./terraform/environments/$(ENV) +###################################################################### + +.PHONY: init +init: + @echo "Initializing Terraform for environment $(ENV)..." + cd $(TF_DIR) && terraform init + +.PHONY: fmt +fmt: + @echo "Formatting Terraform configurations..." + cd $(TF_DIR) && terraform fmt --recursive . + +.PHONY: validate +validate: init + @echo "Validating Terraform configurations..." + cd $(TF_DIR) && terraform validate + +.PHONY: lint +lint: + cd $(TF_DIR) && tflint --init && tflint + +# $(TF_PLAN): init +.PHONY: plan +plan: + @echo "Creating Terraform plan for environment $(ENV)..." + cd $(TF_DIR) && terraform plan -out=$(TF_PLAN) + +# NOTE: our version of apply runs another plan before applying +# Keep in mind this is just for development and prototyping work. +.PHONY: apply +apply: + @echo "Applying Terraform plan for environment $(ENV)..." + # TODO: make sure we have plan + cd $(TF_DIR) && terraform apply -auto-approve $(TF_PLAN) + +destroy: + @echo "Destroying infrastructure for environment $(ENV)..." + cd $(TF_DIR) && terraform destroy -lock=false + +output: + @echo "Showing Terraform outputs for environment $(ENV)..." + cd $(TF_DIR) && terraform output + +show-eventhub-connection: + cd $(TF_DIR) && terraform output eventhub_connection_string + +show-storage-account-connection: + cd $(TF_DIR) && terraform output storage_account_connection_string + +show-connection-strings: show-eventhub-connection show-storage-account-connection + +.PHONY: clean +clean: destroy + +# extra clean +.PHONY: distclean +distclean: clean + @echo "Cleaning up Terraform files for environment $(ENV)..." + cd $(TF_DIR) && rm -rf $(TFLINT_CACHE) $(TF_CLEAN_FILES) $(TF_PLAN) + +.PHONY: export +export: + aztfexport resource-group $(AZTFX_FLAGS) $(AZTFX_RG) + +############################# +# Development workflow setup +############################# + +# Check if tool is installed +define check_tool + @command -v $(1) >/dev/null 2>&1 && echo $(1): `which $(1)` || echo $(1): UNINSTALLED +endef + +check-tools: + $(foreach tool,$(TOOLS),$(call check_tool,$(tool));) + +# Check if tool is installed +define check_tool + @command -v $(1) >/dev/null 2>&1 || (echo "Error: $(1) is not installed. Please install $(1)." && exit 1) +endef + +# Check tools before running a target +define check_tools_before_target + $(foreach tool,$(1),$(call check_tool,$(tool));) +endef + +.PHONY: run-collector +# Check if ENV is valid +check-env: + @if ! echo " $(VALID_ENVS) " | grep -q " $(ENV) "; then \ + echo "Error: Invalid ENV value. ENV must be one of: $(VALID_ENVS)"; \ + exit 1; \ + fi + +.PHONY: refresh +refresh: plan + @echo "Applying Terraform plan for environment $(ENV)..." + cd $(TF_DIR) && terraform apply -refresh-only -auto-approve $(TF_PLAN) + +# Pull from another registry and push to ACR +.PHONY: docker-pull-and-push +docker-pull-and-push: + docker pull $(REMOTE_REGISTRY)/$(IMAGE_SCC):$(TAG_SCC) + docker tag $(REMOTE_REGISTRY)/$(IMAGE_SCC):$(TAG_SCC) $(REGISTRY_NAME).azurecr.io/$(IMAGE_SCC):$(TAG_SCC) + az acr login --name $(REGISTRY_NAME) + docker push $(REGISTRY_NAME).azurecr.io/$(IMAGE_SCC):$(TAG_SCC) + +.PHONY: upload-collector-configs +upload-collector-configs: + az storage file upload \ + --share-name staging-sgc-otelcol-cfg-share-$(PROJECT_ID) \ + --source collector-configs/eventhub-only.yaml \ + --account-name sccstagingsa$(PROJECT_ID) \ + --auth-mode login \ + --enable-file-backup-request-intent \ + --account-key $(shell az storage account keys list --account-name sccstagingsa$(PROJECT_ID) --query '[0].value' -o tsv) + az storage file upload \ + --share-name staging-sgc-scc-cfg-share-$(PROJECT_ID) \ + --source scc-config/config.yaml \ + --account-name sccstagingsa$(PROJECT_ID) \ + --auth-mode login \ + --enable-file-backup-request-intent \ + --account-key $(shell az storage account keys list --account-name sccstagingsa$(PROJECT_ID) --query '[0].value' -o tsv) + + make restart-collector + +restart-collector: + @echo For new config files to take effect, you must restart the collector + @echo in the portal. diff --git a/azure/Makefile.Common b/azure/Makefile.Common deleted file mode 100644 index 1d85561..0000000 --- a/azure/Makefile.Common +++ /dev/null @@ -1,26 +0,0 @@ - -.PHONY: run -run: apply-terraform run-collector - -.PHONY: stop -stop: stop-collector destroy-terraform - -.PHONY: get-logs -get-logs: - docker logs $(EXAMPLE_NAME) >& ../../logs.txt - -.PHONY: apply-terraform -apply-terraform: - terraform init && terraform apply --auto-approve - -.PHONY: destroy-terraform -destroy-terraform: - terraform destroy --auto-approve - -.PHONY: run-collector -run-collector: - docker run -d --name=$(EXAMPLE_NAME) -v ./:/conf/ -e AZURE_CLIENT_ID=${AZURE_CLIENT_ID} -e AZURE_CLIENT_SECRET=${AZURE_CLIENT_SECRET} -e AZURE_SUBSCRIPTION_ID=${AZURE_SUBSCRIPTION_ID} -e AZURE_TENANT_ID=${AZURE_TENANT_ID} -e LS_ACCESS_TOKEN=${LS_ACCESS_TOKEN} otel/opentelemetry-collector-contrib:0.81.0 --config=/conf/collector.yaml - -.PHONY: stop-collector -stop-collector: - docker rm -f $(EXAMPLE_NAME) diff --git a/azure/README.md b/azure/README.md index a2e1e51..a2ab27c 100644 --- a/azure/README.md +++ b/azure/README.md @@ -1,78 +1,126 @@ ---- -# Ingest metrics using the Azure integration +# SGC Demo -The OTel Collector has a variety of [third party receivers](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/master/receiver) that provide integration with a wide variety of metric sources. +This directory contains all the Terraform configurations required to manage the infrastructure for Azure-based log ingest. It's structured to support multiple environments (e.g., staging, public, etc), although the that now seems like overkill at this stage. -Please note that not all metrics receivers available for the OpenTelemetry Collector have been tested by ServiceNow Cloud Observability Observability, and there may be bugs or unexpected issues in using these contributed receivers with ServiceNow Cloud Observability Observability metrics. File any issues with the appropriate OpenTelemetry community. -{: .callout} +## Status and Work Scope -## Prerequisites for local installation +This demo is in development, meaning it's not ready for production use. However, it can deploy a variety of resources to Azure that send logs to the cloud observability backend. -You must have a ServiceNow Cloud Observability Observability [access token](/docs/create-and-manage-access-tokens) for the project to report metrics to. -Also you must have Azure account credentials. +See the PRD [CI Categories](https://lightstep.atlassian.net/wiki/spaces/EPD/pages/3181019147/PRD+Service+Graph+Connector+for+OpenTelemetry+v1.4). The listed categories in that document for Azure are: Datacenters and Resource Groups, Compute, Database and Storage, Load Balancers and Networking. -## Running the Example +You can see the log's sent to the cloud observability backend in the [Lightstep Observability](https://app.lightstep.com/internal-sgcdev-azure/logs). -### 1. Apply terraform configuration to create Azure test env +### Would be nice to have ... -First you'll need to create a resource. +* Modules to send load (`k6`, `ab`, etc.) - to generate additional telemetry. +* Modules covering other Azure resources. -Terraform requires Azure account credentials ARM_CLIENT_ID, ARM_CLIENT_SECRET, ARM_SUBSCRIPTION_ID, ARM_TENANT_ID to be set as ENV variables. +### Need TODO ... -```bash -make apply-terraform -``` +* Reduce module interfaces to essentials. +* Remove TFVARS file and manually set defaults in module variables. +* Fix spec on Collector container, for alternating between builds (e.g. point to official container registry or ACR). +* Setup an ACR registry for containers. +* Add a Terraform backend for Azure. -### 2. Run collector -Collector requires Azure account credentials AZURE_CLIENT_ID, AZURE_CLIENT_SECRET, AZURE_SUBSCRIPTION_ID, AZURE_TENANT_ID to be set as ENV variables. +## Quick Notes on Azure -```bash -make run-collector -``` +If you aren't acquainted to Azure, take note the following: + +- Azure has a concept of Resource Groups. They logically group resources, so you can manage them together. Resource groups can't be nested. +- Azure's log format is JSON, the top-level schema is common to all resource logs, and the `properties` key is where the attributes specific to a resource type schema are provided. +- Availability Zones are a new concept in Azure. They previously managed by letting customers specify Availability Sets they've only partially migrated the platform. +- `resourceId` is the typical spelling of Azure's partially readable resource identifiers like ARNs. It includes subscription ID, resource group, resource type, and name. +- Telemetry received from Logs Analytics Workspaces has a different schema than what's received from EventHubs. This includes the presence of some fields in only one or the other. + +## Resources Monitored + +### Compute/Network + +- [x] Virtual Machine (no enhanced metrics with Azure agent deployed on host) +- [x] Load Balancer +- [x] Virtual Network +- [x] Network Interface +- [x] Public IP Address +- [x] Network Security Group + +### Storage + +- [x] Storage Account +- [x] SQL Database / SQL Server -## Configuration - -Installation of the OpenTelemetry Collector varies, please refer to the [collector documentation](https://opentelemetry.io/docs/collector/) for more information. - -Detailed description of available [Azure metrics per service](https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported). - -Collector Azure Monitor receiver has to be configured to capture required Azure resources, [configuration description](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/azuremonitorreceiver#configuration). - -The following example configuration collects metrics from Azure and send them to ServiceNow Cloud Observability Observability: - -```yaml -receivers: - azuremonitor: - subscription_id: "${AZURE_SUBSCRIPTION_ID}" - tenant_id: "${AZURE_TENANT_ID}" - client_id: "${AZURE_CLIENT_ID}" - client_secret: "${AZURE_CLIENT_SECRET}" - resource_groups: - - "example-resources" - services: - - "microsoft.compute/disks" - - "Microsoft.Network/networkInterfaces" - - "Microsoft.Compute/virtualMachines" - collection_interval: 60s - -exporters: - logging: - loglevel: debug - otlp/public: - endpoint: ingest.lightstep.com:443 - headers: - "lightstep-access-token": "${LS_ACCESS_TOKEN}" - -processors: - batch: - -service: - pipelines: - metrics/azuremonitor: - receivers: [azuremonitor] - processors: [batch] - exporters: [logging, otlp/public] +## Architecture +### Overview + +> **NOTE** +> The previous architecture used Log Analytics Workspaces as an intermediary to send logs to EventHubs. The current architecture isn't just simpler, but it's preferable for consistency of format and availability of data. Additionally, using Log Analytics Workspaces adds cost. + +- Azure resources that emit resource logs are configured with diagnostic settings, which cause logs to be sent to an EventHub. +- An EventHub is an Azure managed messaging application with AMQP defaults. The Collector reads from the EventHub and sends the logs to the cloud observability backend. + +There are problems with sending the logs through Log Analytics Workspaces. Avoid it if possible. + +### Flow Diagram + +``` +**Diagnostic Settings** :arrow_right: **EventHub** :arrow_right: **Otel Collector** :arrow_right: **Cloud Observability** ``` +## Prerequisites + +- Terraform installed and accessible in your PATH +- Docker to build, tag, push, or debug +- Azure subscription +- An authenticated Azure CLI session or configured service principal for Terraform to access your Azure account. This is required to load config files to storage and might be required to load code for functions in the future. You will need permissions to create and manage resources in the target subscription. +- Environment variables for the Azure CLI session or service principal: + - `ARM_CLIENT_ID` + - `ARM_CLIENT_SECRET` + - `ARM_SUBSCRIPTION_ID` + - `ARM_TENANT_ID` +- An environment variable named `LS_ACCESS_TOKEN` for sending logs to the cloud observability backend. + +## Directory Layout + +- **`terraform/modules/`**: This includes `logs`, `common` and will include `apps` as we migrate demos for metrics. + +- **`terraform/environments/`**: This contains subdirectories for environments. It aligns with a common approach to managing configs across multiple environments, but I think the structure is overkill for the project. + +## Using the Makefile + +The Makefile in the root of the Terraform directory provides commands to manage the complete lifecycle of the infrastructure for any specified environment. The available commands are: + +- **`all`**: Runs `init`, `validate`, `plan`, `apply`, and `upload-collector-config`. +- If you just want the environment to run logs to Cloud Observability then you're done after `all` completes, but because `terraform apply` doesn't trigger a local file (collector config) to upload you'll need to use the make rule `upload-collector-config` to put a fresh config to the storage account. If you're developing the terraform IasC then you'll want to use this command the most, because it valitates, fmts, and applies. +- If you're developing Collector configuration then you'll need to upload the config then manually restart the collector. `restart-collector` would be a good make rule to add, but it isn't implemented. + +## TODO + +### Automation + +- [ ] Add a make rule to Restart the Collector (bounce to load new config) +- [ ] Remove hard-coded values from Makefile (just in `upload-collector-config` for now) +- [ ] Add a make rule to compare attributes (semconv check) + +## Work Recommended for the EventHub Receiver + +- Migrate to `github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs` from `github.com/Azure/azure-event-hubs-go/v3`. +- The latter was deprecated and migration recommended in July, 2023 (see: ). + - Improves performance + - Provides better authentication + - Can simplify code, depending on usage + +## Trouble Shooting + +- **Problem: I changed the Collector config. Why didn't telemetry change?** +1. Be sure to upload the config. There's a rule in the Makefile for this. +2. You may need to restart your container, which is currently a manual process. + +- **Problem: I did `make` to run the default rule, but it errored.** +1. Get the 5 digit unique code emitted and export it to your environment with `export PROJECT_ID=${five_digit_code}`. +2. Run `make` again. + +- **Problem: Something else is stopping pipeline after running `make` or `apply` in another form.** +1. Check the environment variables for your containers. Resetting them after apply is manual. +2. Check the log stream. In Container Apps you'll find the log stream in the left-nav bar under "Monitoring". diff --git a/azure/WORKFLOW.md b/azure/WORKFLOW.md new file mode 100644 index 0000000..0007b8e --- /dev/null +++ b/azure/WORKFLOW.md @@ -0,0 +1,65 @@ +# Developing Azure Terraform + +## Workflow Steps + +The workflow is step-by-step, but in reality you'll iterate on this process. + +### Configure your environment + +I mean setup the configuration you want using portal.azure. That will be easiest for green field in Terraform unless you're very experienced with the platform, since we can export that to our Terraform config later. You'll see all of the settings there. + +### Export to Terraform + +Azure develops `aztfexport`, which saves a lot of time in getting a baseline Terraform config. However, I found that it failed every time until I set override flags `--parallelism 1 --provider-version 3.93.0 --continue`. Then I found that to run my setup automatically it helped a lot to set `--non-interactive --output-dir $(EXPORT_DIR)`. + +So I settled on this: +```bash +aztfexport resource-group --parallelism 1 --non-interactive --provider-version 3.93.0 --continue --output-dir $(EXPORT_DIR)} $(RESOURCE_GROUP_NAME)} +``` + +Your mileage may vary in needing to run with these flags. + +Without the following flags I found my exports timed out after "initializing" for the entire time. + +With these flags it still says it's initializing for a while, but you eventually see resources being processed and then wraps up within a few minutes. + + +### Delete the bogus resources + +If you tinker with the log_analytics_workspace at all, and maybe even if you don't, Azure will export a lot of resources called "azurerm_log_analytics_saved_search". This may be a signifant portion of the config. + +### Fix the resource graph + +Normally Terraform resources that refer to **another_resource** will do so by refering to it's attributes. For example, look at the links in these mocks of Azure resources `"azurerm_widget"` that lives in `"azurerm_widget_namespace"`: + +```hcl +resource "azurerm_widget_namespace" "wn" { + name = "My Widget Namespace" +} + +resource "azurerm_widget" "w" { + name = "My Widget" + namespace_name = azurerm_widget_namespace.wn.name +} +``` + +This is a matter of replacing strings in the referer settings to refer to the attribute name instead. The pattern of these intra-module references is `{resource_type}.{resource_name}.{attribute}`. For example, `azurerm_widget.w.name`. You can keep your explicit `depends_on` settings, but don't match strings. + +Many of the resources you manage will have an attribute set like this: +` resource_group_name = azurerm_resource_group.rg.name` + +### Replace resource names + +`aztfexport` will name resources by a numerical sequence like `res-1`, `res-2`, and so on. It's preferable to replace this with something more meaningful. + + + +If you use simple bulk find-and-replace tool without then make sure you start with higher numers and work backwards. Otherwise your operation to replace "res-1" could replace the first characters of "res-11" through "res-19". + +### Use locals for patterns to keep names consitent + +If you reuse something that should be consistent like a "name_prefix" then define it in a locals block and refer to as `local.val_name`. + +### You only need to get all of this right when your config works + +You only have to do as much of this as useful during iterative development. diff --git a/azure/aad_domainservices/metrics.csv b/azure/aad_domainservices/metrics.csv deleted file mode 100644 index 15694a2..0000000 --- a/azure/aad_domainservices/metrics.csv +++ /dev/null @@ -1,11 +0,0 @@ -Name,Description,Unit,DataType,Attributes,Default Aggregation -azure_\directoryservices(ntds)\ldap_searches/sec_average;azure_\directoryservices(ntds)\ldap_searches/sec_count;azure_\directoryservices(ntds)\ldap_searches/sec_maximum;azure_\directoryservices(ntds)\ldap_searches/sec_minimum;azure_\directoryservices(ntds)\ldap_searches/sec_total,"This metric indicates the average number of searches per second for the NTDS object. It is backed by performance counter data from the domain controller, and can be filtered or splitted by role instance.",CountPerSecond,Gauge,azuremonitor.subscription_id;azuremonitor.tenant_id;azuremonitor.resource_id;location;metadata_datacenter;metadata_tenant;metadata_role;metadata_roleinstance;metadata_scaleunit,average -azure_\dns\total_query_received/sec_average;azure_\dns\total_query_received/sec_count;azure_\dns\total_query_received/sec_maximum;azure_\dns\total_query_received/sec_minimum;azure_\dns\total_query_received/sec_total,"This metric indicates the average number of queries received by DNS server in each second. It is backed by performance counter data from the domain controller, and can be filtered or splitted by role instance.",CountPerSecond,Gauge,azuremonitor.subscription_id;azuremonitor.tenant_id;azuremonitor.resource_id;location;metadata_datacenter;metadata_tenant;metadata_role;metadata_roleinstance;metadata_scaleunit,average -azure_\process(dns)\%_processor_time_average;azure_\process(dns)\%_processor_time_count;azure_\process(dns)\%_processor_time_maximum;azure_\process(dns)\%_processor_time_minimum;azure_\process(dns)\%_processor_time_total,"This metric indicates the percentage of elapsed time that all of dns process threads used the processor to execute instructions. An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions are included in this count. It is backed by performance counter data from the domain controller, and can be filtered or splitted by role instance.",Percent,Gauge,azuremonitor.subscription_id;azuremonitor.tenant_id;azuremonitor.resource_id;location;metadata_datacenter;metadata_tenant;metadata_role;metadata_roleinstance;metadata_scaleunit,average -azure_\dns\total_response_sent/sec_average;azure_\dns\total_response_sent/sec_count;azure_\dns\total_response_sent/sec_maximum;azure_\dns\total_response_sent/sec_minimum;azure_\dns\total_response_sent/sec_total,"This metric indicates the average number of reponses sent by DNS server in each second. It is backed by performance counter data from the domain controller, and can be filtered or splitted by role instance.",CountPerSecond,Gauge,azuremonitor.subscription_id;azuremonitor.tenant_id;azuremonitor.resource_id;location;metadata_datacenter;metadata_tenant;metadata_role;metadata_roleinstance;metadata_scaleunit,average -azure_\process(lsass)\%_processor_time_average;azure_\process(lsass)\%_processor_time_count;azure_\process(lsass)\%_processor_time_maximum;azure_\process(lsass)\%_processor_time_minimum;azure_\process(lsass)\%_processor_time_total,"This metric indicates the percentage of elapsed time that all of lsass process threads used the processor to execute instructions. An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions are included in this count. It is backed by performance counter data from the domain controller, and can be filtered or splitted by role instance.",Percent,Gauge,azuremonitor.subscription_id;azuremonitor.tenant_id;azuremonitor.resource_id;location;metadata_datacenter;metadata_tenant;metadata_role;metadata_roleinstance;metadata_scaleunit,average -azure_\memory\%_committed_bytes_in_use_average;azure_\memory\%_committed_bytes_in_use_count;azure_\memory\%_committed_bytes_in_use_maximum;azure_\memory\%_committed_bytes_in_use_minimum;azure_\memory\%_committed_bytes_in_use_total,"This metric indicates the ratio of Memory\Committed Bytes to the Memory\Commit Limit. Committed memory is the physical memory in use for which space has been reserved in the paging file should it need to be written to disk. The commit limit is determined by the size of the paging file. If the paging file is enlarged, the commit limit increases, and the ratio is reduced. This counter displays the current percentage value only; it is not an average. It is backed by performance counter data from the domain controller, and can be filtered or splitted by role instance.",Percent,Gauge,azuremonitor.subscription_id;azuremonitor.tenant_id;azuremonitor.resource_id;location;metadata_datacenter;metadata_tenant;metadata_role;metadata_roleinstance;metadata_scaleunit,average -azure_\directoryservices(ntds)\ldap_successful_binds/sec_average;azure_\directoryservices(ntds)\ldap_successful_binds/sec_count;azure_\directoryservices(ntds)\ldap_successful_binds/sec_maximum;azure_\directoryservices(ntds)\ldap_successful_binds/sec_minimum;azure_\directoryservices(ntds)\ldap_successful_binds/sec_total,"This metric indicates the number of LDAP successful binds per second for the NTDS object. It is backed by performance counter data from the domain controller, and can be filtered or splitted by role instance.",CountPerSecond,Gauge,azuremonitor.subscription_id;azuremonitor.tenant_id;azuremonitor.resource_id;location;metadata_datacenter;metadata_tenant;metadata_role;metadata_roleinstance;metadata_scaleunit,average -azure_\processor(_total)\%_processor_time_average;azure_\processor(_total)\%_processor_time_count;azure_\processor(_total)\%_processor_time_maximum;azure_\processor(_total)\%_processor_time_minimum;azure_\processor(_total)\%_processor_time_total,"This metric indicates the percentage of elapsed time that the processor spends to execute a non-Idle thread. It is calculated by measuring the percentage of time that the processor spends executing the idle thread and then subtracting that value from 100%. (Each processor has an idle thread that consumes cycles when no other threads are ready to run). This counter is the primary indicator of processor activity, and displays the average percentage of busy time observed during the sample interval. It should be noted that the accounting calculation of whether the processor is idle is performed at an internal sampling interval of the system clock (10ms). On todays fast processors, % Processor Time can therefore underestimate the processor utilization as the processor may be spending a lot of time servicing threads between the system clock sampling interval. Workload based timer applications are one example of applications which are more likely to be measured inaccurately as timers are signaled just after the sample is taken. It is backed by performance counter data from the domain controller, and can be filtered or splitted by role instance.",Percent,Gauge,azuremonitor.subscription_id;azuremonitor.tenant_id;azuremonitor.resource_id;location;metadata_datacenter;metadata_tenant;metadata_role;metadata_roleinstance;metadata_scaleunit,average -azure_\security_system-wide_statistics\kerberos_authentications_average;azure_\security_system-wide_statistics\kerberos_authentications_count;azure_\security_system-wide_statistics\kerberos_authentications_maximum;azure_\security_system-wide_statistics\kerberos_authentications_minimum;azure_\security_system-wide_statistics\kerberos_authentications_total,"This metric indicates the number of times that clients use a ticket to authenticate to this computer per second. It is backed by performance counter data from the domain controller, and can be filtered or splitted by role instance.",CountPerSecond,Gauge,azuremonitor.subscription_id;azuremonitor.tenant_id;azuremonitor.resource_id;location;metadata_datacenter;metadata_tenant;metadata_role;metadata_roleinstance;metadata_scaleunit,average -azure_\security_system-wide_statistics\ntlm_authentications_average;azure_\security_system-wide_statistics\ntlm_authentications_count;azure_\security_system-wide_statistics\ntlm_authentications_maximum;azure_\security_system-wide_statistics\ntlm_authentications_minimum;azure_\security_system-wide_statistics\ntlm_authentications_total,"This metric indicates the number of NTLM authentications processed per second for the Active Directory on this domain contrller or for local accounts on this member server. It is backed by performance counter data from the domain controller, and can be filtered or splitted by role instance.",CountPerSecond,Gauge,azuremonitor.subscription_id;azuremonitor.tenant_id;azuremonitor.resource_id;location;metadata_datacenter;metadata_tenant;metadata_role;metadata_roleinstance;metadata_scaleunit,average diff --git a/azure/apimanagement_service/dashboards/overview/main.tf b/azure/apimanagement_service/dashboards/overview/main.tf deleted file mode 100644 index fa01fdd..0000000 --- a/azure/apimanagement_service/dashboards/overview/main.tf +++ /dev/null @@ -1,226 +0,0 @@ -terraform { - required_providers { - lightstep = { - source = "lightstep/lightstep" - version = "~> 1.86.1" - } - } - required_version = ">= v1.0.11" -} - -variable "lightstep_project" { - description = "ServiceNow Cloud Observability Project Name" - type = string -} - -output "dashboard_url" { - value = "https://app.lightstep.com/${var.lightstep_project}/dashboard/${lightstep_dashboard.azure_apimanagement_service_overview.id}" - description = "OpenTelemetry Collector API Management Service Dashboard URL" -} - -resource "lightstep_dashboard" "azure_apimanagement_service_overview" { - project_name = var.lightstep_project - dashboard_name = "API Management Service Metrics" - dashboard_description = "Monitor API Management Service with this metrics overview dashboard." - - chart { - name = "Requests" - rank = "0" - type = "timeseries" - - query { - query_name = "a" - display = "line" - hidden = false - query_string = <