Skip to content
This repository was archived by the owner on Mar 6, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
292 changes: 292 additions & 0 deletions azure/machinelearningservices_workspaces/dashboards/overview/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,292 @@
terraform {
required_providers {
lightstep = {
source = "lightstep/lightstep"
version = "~> 1.76.0"
}
}
required_version = ">= v1.0.11"
}

variable "lightstep_project" {
description = "Cloud Observability Project Name"
type = string
}

output "dashboard_url" {
value = "https://app.lightstep.com/${var.lightstep_project}/dashboard/${lightstep_dashboard.azure_machinelearningservices_workspaces_overview.id}"
description = "OpenTelemetry Collector Machine Learning Services Workspaces Dashboard URL"
}

resource "lightstep_dashboard" "azure_machinelearningservices_workspaces_overview" {
project_name = var.lightstep_project
dashboard_name = "Machine Learning Services Workspaces Metrics"
dashboard_description = "[Beta] Monitor Machine Learning Services Workspaces with this metrics overview dashboard."

chart {
name = "CPU Utilization %"
rank = "0"
type = "timeseries"

query {
query_name = "a"
display = "line"
hidden = false
query_string = <<EOT
metric azure_cpuutilizationpercentage_average | filter ((azuremonitor.subscription_id == $azuremonitor_subscription_id) && (azuremonitor.tenant_id == $azuremonitor_tenant_id) && (azuremonitor.resource_id == $azuremonitor_resource_id) && (location == $location) && (metadata_instanceid == $metadata_instanceid) && (metadata_computename == $metadata_computename)) | delta | group_by [], sum
EOT
}
}

chart {
name = "CPU Memory Utilization %"
rank = "1"
type = "timeseries"

query {
query_name = "a"
display = "line"
hidden = false
query_string = <<EOT
metric azure_cpumemoryutilizationpercentage_average | filter ((azuremonitor.subscription_id == $azuremonitor_subscription_id) && (azuremonitor.tenant_id == $azuremonitor_tenant_id) && (azuremonitor.resource_id == $azuremonitor_resource_id) && (location == $location) && (metadata_instanceid == $metadata_instanceid) && (metadata_computename == $metadata_computename)) | delta | group_by [], sum
EOT
}
}

chart {
name = "Network Input Mb"
rank = "2"
type = "timeseries"

query {
query_name = "a"
display = "line"
hidden = false
query_string = <<EOT
metric azure_networkinputmegabytes_average | filter ((azuremonitor.subscription_id == $azuremonitor_subscription_id) && (azuremonitor.tenant_id == $azuremonitor_tenant_id) && (azuremonitor.resource_id == $azuremonitor_resource_id) && (location == $location) && (metadata_instanceid == $metadata_instanceid) && (metadata_computename == $metadata_computename)) | delta | group_by [], sum
EOT
}
}

chart {
name = "Network Output Mb"
rank = "3"
type = "timeseries"

query {
query_name = "a"
display = "line"
hidden = false
query_string = <<EOT
metric azure_networkoutputmegabytes_average | filter ((azuremonitor.subscription_id == $azuremonitor_subscription_id) && (azuremonitor.tenant_id == $azuremonitor_tenant_id) && (azuremonitor.resource_id == $azuremonitor_resource_id) && (location == $location) && (metadata_instanceid == $metadata_instanceid) && (metadata_computename == $metadata_computename)) | delta | group_by [], sum
EOT
}
}

chart {
name = "Errors"
rank = "4"
type = "timeseries"

query {
query_name = "a"
display = "line"
hidden = false
query_string = <<EOT
metric azure_errors_total | filter ((azuremonitor.subscription_id == $azuremonitor_subscription_id) && (azuremonitor.tenant_id == $azuremonitor_tenant_id) && (azuremonitor.resource_id == $azuremonitor_resource_id) && (location == $location) && (metadata_scenario == $metadata_scenario)) | delta | group_by [], sum
EOT
}
}

chart {
name = "Failed Runs"
rank = "5"
type = "timeseries"

query {
query_name = "a"
display = "line"
hidden = false
query_string = <<EOT
metric azure_failed_runs_total | filter ((azuremonitor.subscription_id == $azuremonitor_subscription_id) && (azuremonitor.tenant_id == $azuremonitor_tenant_id) && (azuremonitor.resource_id == $azuremonitor_resource_id) && (location == $location) && (metadata_scenario == $metadata_scenario)) | delta | group_by [], sum
EOT
}
}

chart {
name = "Cancelled Runs"
rank = "6"
type = "timeseries"

query {
query_name = "a"
display = "line"
hidden = false
query_string = <<EOT
metric azure_cancelled_runs_total | filter ((azuremonitor.subscription_id == $azuremonitor_subscription_id) && (azuremonitor.tenant_id == $azuremonitor_tenant_id) && (azuremonitor.resource_id == $azuremonitor_resource_id) && (location == $location) && (metadata_scenario == $metadata_scenario)) | delta | group_by [], sum
EOT
}
}

chart {
name = "Completed Runs"
rank = "7"
type = "timeseries"

query {
query_name = "a"
display = "line"
hidden = false
query_string = <<EOT
metric azure_completed_runs_total | filter ((azuremonitor.subscription_id == $azuremonitor_subscription_id) && (azuremonitor.tenant_id == $azuremonitor_tenant_id) && (azuremonitor.resource_id == $azuremonitor_resource_id) && (location == $location) && (metadata_scenario == $metadata_scenario)) | delta | group_by [], sum
EOT
}
}

chart {
name = "Warnings"
rank = "8"
type = "timeseries"

query {
query_name = "a"
display = "line"
hidden = false
query_string = <<EOT
metric azure_warnings_total | filter ((azuremonitor.subscription_id == $azuremonitor_subscription_id) && (azuremonitor.tenant_id == $azuremonitor_tenant_id) && (azuremonitor.resource_id == $azuremonitor_resource_id) && (location == $location) && (metadata_scenario == $metadata_scenario)) | delta | group_by [], sum
EOT
}
}

chart {
name = "Disk Write Mb"
rank = "9"
type = "timeseries"

query {
query_name = "a"
display = "line"
hidden = false
query_string = <<EOT
metric azure_diskwritemegabytes_average | filter ((azuremonitor.subscription_id == $azuremonitor_subscription_id) && (azuremonitor.tenant_id == $azuremonitor_tenant_id) && (azuremonitor.resource_id == $azuremonitor_resource_id) && (location == $location) && (metadata_instanceid == $metadata_instanceid) && (metadata_computename == $metadata_computename)) | delta | group_by [], sum
EOT
}
}

chart {
name = "Disk Read Mb"
rank = "10"
type = "timeseries"

query {
query_name = "a"
display = "line"
hidden = false
query_string = <<EOT
metric azure_diskreadmegabytes_average | filter ((azuremonitor.subscription_id == $azuremonitor_subscription_id) && (azuremonitor.tenant_id == $azuremonitor_tenant_id) && (azuremonitor.resource_id == $azuremonitor_resource_id) && (location == $location) && (metadata_instanceid == $metadata_instanceid) && (metadata_computename == $metadata_computename)) | delta | group_by [], sum
EOT
}
}

chart {
name = "GPU Energy Joules"
rank = "11"
type = "timeseries"

query {
query_name = "a"
display = "line"
hidden = false
query_string = <<EOT
metric azure_gpuenergyjoules_total | filter ((azuremonitor.subscription_id == $azuremonitor_subscription_id) && (azuremonitor.tenant_id == $azuremonitor_tenant_id) && (azuremonitor.resource_id == $azuremonitor_resource_id) && (location == $location) && (metadata_scenario == $metadata_scenario) && (metadata_instanceid == $metadata_instanceid) && (metadata_computename == $metadata_computename)) | delta | group_by [], sum
EOT
}
}

chart {
name = "Active Cores Average"
rank = "12"
type = "timeseries"

query {
query_name = "a"
display = "line"
hidden = false
query_string = <<EOT
metric azure_active_cores_average | filter ((azuremonitor.subscription_id == $azuremonitor_subscription_id) && (azuremonitor.tenant_id == $azuremonitor_tenant_id) && (azuremonitor.resource_id == $azuremonitor_resource_id) && (location == $location) && (metadata_scenario == $metadata_scenario)) | delta | group_by [], sum
EOT
}
}

chart {
name = "Active Nodes"
rank = "13"
type = "timeseries"

query {
query_name = "a"
display = "line"
hidden = false
query_string = <<EOT
metric azure_active_nodes_average | filter ((azuremonitor.subscription_id == $azuremonitor_subscription_id) && (azuremonitor.tenant_id == $azuremonitor_tenant_id) && (azuremonitor.resource_id == $azuremonitor_resource_id) && (location == $location) && (metadata_scenario == $metadata_scenario)) | delta | group_by [], sum
EOT
}
}

chart {
name = "Cancel Requested Runs"
rank = "14"
type = "timeseries"

query {
query_name = "a"
display = "line"
hidden = false
query_string = <<EOT
metric azure_cancel_requested_runs_total | filter ((azuremonitor.subscription_id == $azuremonitor_subscription_id) && (azuremonitor.tenant_id == $azuremonitor_tenant_id) && (azuremonitor.resource_id == $azuremonitor_resource_id) && (location == $location) && (metadata_scenario == $metadata_scenario)) | delta | group_by [], sum
EOT
}
}

template_variable {
name = "azuremonitor_resource_id"
default_values = []
suggestion_attribute_key = "azuremonitor.resource_id"
}

template_variable {
name = "azuremonitor_subscription_id"
default_values = []
suggestion_attribute_key = "azuremonitor.subscription_id"
}

template_variable {
name = "azuremonitor_tenant_id"
default_values = []
suggestion_attribute_key = "azuremonitor.tenant_id"
}

template_variable {
name = "location"
default_values = []
suggestion_attribute_key = "location"
}

template_variable {
name = "metadata_computename"
default_values = []
suggestion_attribute_key = "metadata_computename"
}

template_variable {
name = "metadata_instanceid"
default_values = []
suggestion_attribute_key = "metadata_instanceid"
}

template_variable {
name = "metadata_scenario"
default_values = []
suggestion_attribute_key = "metadata_scenario"
}
}
Loading