From 7ce4bb31a6fcdb1a7c2c28dd3645d25520d661f0 Mon Sep 17 00:00:00 2001 From: Amol Agrawal Date: Tue, 21 May 2024 05:24:48 +0000 Subject: [PATCH 01/15] disable custom metrics by default --- .../scripts/tomlparser-common-agent-config.rb | 17 +++++++++++++++++ build/linux/installer/conf/container.conf | 6 ++++++ build/linux/installer/conf/kube.conf | 9 ++++++++- .../templates/ama-logs-daemonset-windows.yaml | 2 ++ .../templates/ama-logs-daemonset.yaml | 2 ++ .../templates/ama-logs-deployment.yaml | 2 ++ charts/azuremonitor-containers/values.yaml | 3 +++ kubernetes/linux/main.sh | 8 +++++++- 8 files changed, 47 insertions(+), 2 deletions(-) diff --git a/build/common/installer/scripts/tomlparser-common-agent-config.rb b/build/common/installer/scripts/tomlparser-common-agent-config.rb index f3cc30c98..7eb8c3c19 100644 --- a/build/common/installer/scripts/tomlparser-common-agent-config.rb +++ b/build/common/installer/scripts/tomlparser-common-agent-config.rb @@ -11,6 +11,7 @@ @disableTelemetry = false @logEnableKubernetesMetadataCacheTTLSeconds = 60 @enableHighLogScaleMode = false +@enableCustomMetrics = false def is_windows? return !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0 @@ -81,6 +82,14 @@ def populateSettingValuesFromConfigMap(parsedConfig) end end + if !parsedConfig.nil? && !parsedConfig[:agent_settings].nil? + enable_custom_metrics = parsedConfig[:agent_settings][:enable_custom_metrics] + if !enable_custom_metrics.nil? && !enable_custom_metrics[:enabled].nil? + @enableCustomMetrics = enable_custom_metrics[:enabled] + puts "Using config map value: enabled = #{@enableCustomMetrics} for custom metrics" + end + end + rescue => errorStr puts "config::error:Exception while reading config settings for agent configuration setting - #{errorStr}, using defaults" end @@ -117,6 +126,11 @@ def get_command_windows(env_variable_name, env_variable_value) file.write(commands) end + if @enableCustomMetrics + commands = get_command_windows("ENABLE_CUSTOM_METRICS", @enableCustomMetrics) + file.write(commands) + end + commands = get_command_windows("AZMON_KUBERNETES_METADATA_CACHE_TTL_SECONDS", @logEnableKubernetesMetadataCacheTTLSeconds) file.write(commands) # Close file after writing all environment variables @@ -136,6 +150,9 @@ def get_command_windows(env_variable_name, env_variable_value) if @enableHighLogScaleMode file.write("export ENABLE_HIGH_LOG_SCALE_MODE=#{@enableHighLogScaleMode}\n") end + if @enableCustomMetrics + file.write("export ENABLE_CUSTOM_METRICS=#{@enableCustomMetrics}\n") + end file.write("export AZMON_KUBERNETES_METADATA_CACHE_TTL_SECONDS=#{@logEnableKubernetesMetadataCacheTTLSeconds}\n") # Close file after writing all environment variables file.close diff --git a/build/linux/installer/conf/container.conf b/build/linux/installer/conf/container.conf index 5f987b0a5..149634836 100644 --- a/build/linux/installer/conf/container.conf +++ b/build/linux/installer/conf/container.conf @@ -8,6 +8,7 @@ bind 127.0.0.1 +#CustomMetricsStart # MDM metrics from telegraf @type tcp @@ -16,6 +17,7 @@ port 25228 format json +#CustomMetricsEnd # Container inventory @@ -33,6 +35,7 @@ @log_level info +#CustomMetricsStart #custom_metrics_mdm filter plugin @type cadvisor2mdm @@ -44,6 +47,7 @@ @type telegraf2mdm @log_level info +#CustomMetricsEnd #containerinventory @@ -97,6 +101,7 @@ keepalive true +#CustomMetricsStart @type mdm @log_level info @@ -113,6 +118,7 @@ retry_mdm_post_wait_minutes 30 +#CustomMetricsEnd #InsightsMetrics diff --git a/build/linux/installer/conf/kube.conf b/build/linux/installer/conf/kube.conf index 5fbf1a378..5183bb0cb 100644 --- a/build/linux/installer/conf/kube.conf +++ b/build/linux/installer/conf/kube.conf @@ -59,13 +59,14 @@ keepalive true +#CustomMetricsStart #custom_metrics_mdm filter plugin for perf data from windows nodes @type cadvisor2mdm metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,pvUsedBytes @log_level info - +#CustomMetricsEnd #Kubernetes pod inventory @@ -194,6 +195,7 @@ keepalive true +#CustomMetricsStart @type mdm @id out_mdm_nodeinventory @@ -212,6 +214,7 @@ retry_mdm_post_wait_minutes 30 +#CustomMetricsEnd #Kubernetes events @@ -248,6 +251,7 @@ keepalive true +#CustomMetricsStart #Kubernetes podmdm inventory @@ -275,6 +279,7 @@ retry_mdm_post_wait_minutes 30 +#CustomMetricsEnd #Kubernetes perf inventory @@ -337,6 +342,7 @@ keepalive true +#CustomMetricsStart @type mdm @id out_mdm_perf @@ -355,4 +361,5 @@ retry_mdm_post_wait_minutes 30 +#CustomMetricsEnd diff --git a/charts/azuremonitor-containers/templates/ama-logs-daemonset-windows.yaml b/charts/azuremonitor-containers/templates/ama-logs-daemonset-windows.yaml index 29c767f52..2cacbf795 100644 --- a/charts/azuremonitor-containers/templates/ama-logs-daemonset-windows.yaml +++ b/charts/azuremonitor-containers/templates/ama-logs-daemonset-windows.yaml @@ -97,6 +97,8 @@ spec: fieldPath: metadata.name - name: SIDECAR_SCRAPING_ENABLED value: {{ .Values.amalogs.sidecarscraping | quote }} + - name: ENABLE_CUSTOM_METRICS + value: {{ .Values.amalogs.enableCustomMetrics | quote }} volumeMounts: # Uncomment when telegraf upgraded to 1.28.5 or higher # {{- if .Values.amalogs.enableServiceAccountTimeBoundToken }} diff --git a/charts/azuremonitor-containers/templates/ama-logs-daemonset.yaml b/charts/azuremonitor-containers/templates/ama-logs-daemonset.yaml index 6f305f33e..be82be35a 100644 --- a/charts/azuremonitor-containers/templates/ama-logs-daemonset.yaml +++ b/charts/azuremonitor-containers/templates/ama-logs-daemonset.yaml @@ -129,6 +129,8 @@ spec: {{- end }} - name: IS_CUSTOM_CERT value: {{ .Values.Azure.proxySettings.isCustomCert | quote }} + - name: ENABLE_CUSTOM_METRICS + value: {{ .Values.amalogs.enableCustomMetrics | quote }} - name: HOSTNAME valueFrom: fieldRef: diff --git a/charts/azuremonitor-containers/templates/ama-logs-deployment.yaml b/charts/azuremonitor-containers/templates/ama-logs-deployment.yaml index af6c8e63a..06c29d784 100644 --- a/charts/azuremonitor-containers/templates/ama-logs-deployment.yaml +++ b/charts/azuremonitor-containers/templates/ama-logs-deployment.yaml @@ -112,6 +112,8 @@ spec: {{- end }} - name: IS_CUSTOM_CERT value: {{ .Values.Azure.proxySettings.isCustomCert | quote }} + - name: ENABLE_CUSTOM_METRICS + value: {{ .Values.amalogs.enableCustomMetrics | quote }} - name: HOSTNAME valueFrom: fieldRef: diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml index 05340b3af..ffbca68ff 100644 --- a/charts/azuremonitor-containers/values.yaml +++ b/charts/azuremonitor-containers/values.yaml @@ -66,6 +66,9 @@ amalogs: # This flag to enable and disable service account timebound token and default is enabled enableServiceAccountTimeBoundToken: true + # This flag to enable and disable custom metrics and default is enabled + enableCustomMetrics: true + ## To get your workspace id and key do the following ## You can create a Azure Loganalytics workspace from portal.azure.com and get its ID & PRIMARY KEY from 'Advanced Settings' tab in the Ux. diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh index 8c3d318b1..0026757e9 100644 --- a/kubernetes/linux/main.sh +++ b/kubernetes/linux/main.sh @@ -1080,12 +1080,18 @@ if [ "$AZMON_RESOURCE_OPTIMIZATION_ENABLED" != "true" ]; then if [ ! -e "/etc/config/kube.conf" ]; then if [ "$LOGS_AND_EVENTS_ONLY" != "true" ]; then echo "*** starting fluentd v1 in daemonset" + if [ "${ENABLE_CUSTOM_METRICS}" != "true" ]; then + sed -i '/^#CustomMetricsStart/,/^#CustomMetricsEnd/ s/^/# /' /etc/fluent/container.conf + fi fluentd -c /etc/fluent/container.conf -o /var/opt/microsoft/docker-cimprov/log/fluentd.log --log-rotate-age 5 --log-rotate-size 20971520 & else echo "Skipping fluentd since LOGS_AND_EVENTS_ONLY is set to true" fi else echo "*** starting fluentd v1 in replicaset" + if [ "${ENABLE_CUSTOM_METRICS}" != "true" ]; then + sed -i '/^#CustomMetricsStart/,/^#CustomMetricsEnd/ s/^/# /' /etc/fluent/kube.conf + fi fluentd -c /etc/fluent/kube.conf -o /var/opt/microsoft/docker-cimprov/log/fluentd.log --log-rotate-age 5 --log-rotate-size 20971520 & fi fi @@ -1220,7 +1226,7 @@ if [ "${GENEVA_LOGS_INTEGRATION_SERVICE_MODE}" != "true" ]; then sed -i -e "s/placeholder_hostname/$nodename/g" $telegrafConfFile fi -if [ "${AZMON_RESOURCE_OPTIMIZATION_ENABLED}" == "true" ]; then +if [ "${AZMON_RESOURCE_OPTIMIZATION_ENABLED}" == "true" ] || [ "${ENABLE_CUSTOM_METRICS}" != "true" ]; then sed -i '/^#CustomMetricsStart/,/^#CustomMetricsEnd/ s/^/# /' $telegrafConfFile fi From 1d4cb67ac3f2ab11e84c5a8b58e087a2bd26893a Mon Sep 17 00:00:00 2001 From: Amol Agrawal Date: Tue, 21 May 2024 06:13:01 +0000 Subject: [PATCH 02/15] missed changes --- build/linux/installer/conf/kube.conf | 2 ++ kubernetes/linux/main.sh | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/build/linux/installer/conf/kube.conf b/build/linux/installer/conf/kube.conf index 5183bb0cb..33c6b5106 100644 --- a/build/linux/installer/conf/kube.conf +++ b/build/linux/installer/conf/kube.conf @@ -164,10 +164,12 @@ keepalive true +#CustomMetricsStart @type inventory2mdm @log_level info +#CustomMetricsEnd #kubenodeinventory diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh index 0026757e9..b5969b3ab 100644 --- a/kubernetes/linux/main.sh +++ b/kubernetes/linux/main.sh @@ -1255,7 +1255,7 @@ if [ ! -e "/etc/config/kube.conf" ] && [ "${GENEVA_LOGS_INTEGRATION_SERVICE_MODE else echo "checking for listener on tcp #25226 and waiting for $WAITTIME_PORT_25226 secs if not.." waitforlisteneronTCPport 25226 $WAITTIME_PORT_25226 - if [ "${AZMON_RESOURCE_OPTIMIZATION_ENABLED}" != "true" ]; then + if [ "${AZMON_RESOURCE_OPTIMIZATION_ENABLED}" != "true" ] || [ "${ENABLE_CUSTOM_METRICS}" == true ]; then echo "checking for listener on tcp #25228 and waiting for $WAITTIME_PORT_25228 secs if not.." waitforlisteneronTCPport 25228 $WAITTIME_PORT_25228 fi From 116e554151a42c526d91325b2bba7a59b91b67b0 Mon Sep 17 00:00:00 2001 From: Amol Agrawal Date: Tue, 21 May 2024 17:42:02 +0000 Subject: [PATCH 03/15] changes for windows --- build/windows/installer/conf/fluent.conf | 2 + .../installer/livenessprobe/livenessprobe.cpp | 17 +- kubernetes/windows/main.ps1 | 183 +++++++++++------- 3 files changed, 128 insertions(+), 74 deletions(-) diff --git a/build/windows/installer/conf/fluent.conf b/build/windows/installer/conf/fluent.conf index b3e423265..99cf3e24f 100644 --- a/build/windows/installer/conf/fluent.conf +++ b/build/windows/installer/conf/fluent.conf @@ -4,6 +4,7 @@ @log_level info +#CustomMetricsStart @type cadvisor_perf tag oms.api.cadvisorperf @@ -35,3 +36,4 @@ retry_mdm_post_wait_minutes 30 +#CustomMetricsEnd \ No newline at end of file diff --git a/build/windows/installer/livenessprobe/livenessprobe.cpp b/build/windows/installer/livenessprobe/livenessprobe.cpp index b309ece81..614d818af 100644 --- a/build/windows/installer/livenessprobe/livenessprobe.cpp +++ b/build/windows/installer/livenessprobe/livenessprobe.cpp @@ -114,12 +114,16 @@ int _tmain(int argc, wchar_t *argv[]) return NO_FLUENT_BIT_PROCESS; } - DWORD dwStatus = GetServiceStatus(argv[2]); - - if (dwStatus != SERVICE_RUNNING) + DWORD enableCustomMetrics = GetEnvironmentVariable(L"ENABLE_CUSTOM_METRICS", nullptr, 0); + DWORD msiMode = GetEnvironmentVariable(L"USING_AAD_MSI_AUTH", nullptr, 0); + if (enableCustomMetrics == "true" || msiMode != "true") { - wprintf_s(L"ERROR:Service:%s is not running\n", argv[2]); - return FLUENTDWINAKS_SERVICE_NOT_RUNNING; + DWORD dwStatus = GetServiceStatus(argv[2]); + if (dwStatus != SERVICE_RUNNING) + { + wprintf_s(L"ERROR:Service:%s is not running\n", argv[2]); + return FLUENTDWINAKS_SERVICE_NOT_RUNNING; + } } if (IsFileExists(argv[3])) @@ -134,7 +138,8 @@ int _tmain(int argc, wchar_t *argv[]) return CERTIFICATE_RENEWAL_REQUIRED; } - if (argc > 5) { + if (argc > 5) + { if (!IsProcessRunning(argv[5])) { wprintf_s(L"ERROR:Process:%s is not running\n", argv[5]); diff --git a/kubernetes/windows/main.ps1 b/kubernetes/windows/main.ps1 index 319cc1c93..9e2a4c5ff 100644 --- a/kubernetes/windows/main.ps1 +++ b/kubernetes/windows/main.ps1 @@ -45,7 +45,8 @@ function Test-FluentbitTcpListener { [System.Environment]::SetEnvironmentVariable("WAITTIME_PORT_25229", $waitTimeSecs, "Process") [System.Environment]::SetEnvironmentVariable("WAITTIME_PORT_25229", $waitTimeSecs, "Machine") Write-Host "Successfully set environment variable WAITTIME_PORT_25229 - $($waitTimeSecs) for target 'machine'..." - } else { + } + else { Write-Host "Failed to set environment variable WAITTIME_PORT_25229 for target 'machine' since it is either null or empty" $waitTimeSecs = 30 } @@ -187,13 +188,13 @@ function Set-AMA3PEnvironmentVariables { } function Generate-GenevaTenantNameSpaceConfig { - $genevaLogsTenantNameSpaces = [System.Environment]::GetEnvironmentVariable("GENEVA_LOGS_TENANT_NAMESPACES", "process") + $genevaLogsTenantNameSpaces = [System.Environment]::GetEnvironmentVariable("GENEVA_LOGS_TENANT_NAMESPACES", "process") if (![string]::IsNullOrEmpty($genevaLogsTenantNameSpaces)) { [System.Environment]::SetEnvironmentVariable("GENEVA_LOGS_TENANT_NAMESPACES", $genevaLogsTenantNameSpaces, "machine") $genevaLogsTenantNameSpacesArray = $genevaLogsTenantNameSpaces.Split(",") for ($i = 0; $i -lt $genevaLogsTenantNameSpacesArray.Length; $i = $i + 1) { - $tenantName = $genevaLogsTenantNameSpacesArray[$i] - Copy-Item C:/etc/fluent-bit/fluent-bit-geneva-logs_tenant.conf -Destination C:/etc/fluent-bit/fluent-bit-geneva-logs_$tenantName.conf + $tenantName = $genevaLogsTenantNameSpacesArray[$i] + Copy-Item C:/etc/fluent-bit/fluent-bit-geneva-logs_tenant.conf -Destination C:/etc/fluent-bit/fluent-bit-geneva-logs_$tenantName.conf (Get-Content -Path C:/etc/fluent-bit/fluent-bit-geneva-logs_$tenantName.conf -Raw) -replace '', $tenantName | Set-Content C:/etc/fluent-bit/fluent-bit-geneva-logs_$tenantName.conf } } @@ -201,18 +202,18 @@ function Generate-GenevaTenantNameSpaceConfig { } function Generate-GenevaInfraNameSpaceConfig { - $genevaLogsInfraNameSpaces = [System.Environment]::GetEnvironmentVariable("GENEVA_LOGS_INFRA_NAMESPACES", "process") - if (![string]::IsNullOrEmpty($genevaLogsInfraNameSpaces)) { - [System.Environment]::SetEnvironmentVariable("GENEVA_LOGS_INFRA_NAMESPACES", $genevaLogsInfraNameSpaces, "machine") - $genevaLogsInfraNameSpacesArray = $genevaLogsInfraNameSpaces.Split(",") - for ($i = 0; $i -lt $genevaLogsInfraNameSpacesArray.Length; $i = $i + 1) { - $infraNameSpaceName = $genevaLogsInfraNameSpacesArray[$i] - $infraNamespaceWithoutSuffix = $infraNameSpaceName.TrimEnd("_*") - Copy-Item C:/etc/fluent-bit/fluent-bit-geneva-logs_infra.conf -Destination C:/etc/fluent-bit/fluent-bit-geneva-logs_$infraNamespaceWithoutSuffix.conf + $genevaLogsInfraNameSpaces = [System.Environment]::GetEnvironmentVariable("GENEVA_LOGS_INFRA_NAMESPACES", "process") + if (![string]::IsNullOrEmpty($genevaLogsInfraNameSpaces)) { + [System.Environment]::SetEnvironmentVariable("GENEVA_LOGS_INFRA_NAMESPACES", $genevaLogsInfraNameSpaces, "machine") + $genevaLogsInfraNameSpacesArray = $genevaLogsInfraNameSpaces.Split(",") + for ($i = 0; $i -lt $genevaLogsInfraNameSpacesArray.Length; $i = $i + 1) { + $infraNameSpaceName = $genevaLogsInfraNameSpacesArray[$i] + $infraNamespaceWithoutSuffix = $infraNameSpaceName.TrimEnd("_*") + Copy-Item C:/etc/fluent-bit/fluent-bit-geneva-logs_infra.conf -Destination C:/etc/fluent-bit/fluent-bit-geneva-logs_$infraNamespaceWithoutSuffix.conf (Get-Content -Path C:/etc/fluent-bit/fluent-bit-geneva-logs_$infraNamespaceWithoutSuffix.conf -Raw) -replace '', $infraNameSpaceName | Set-Content C:/etc/fluent-bit/fluent-bit-geneva-logs_$infraNamespaceWithoutSuffix.conf - } - } - Remove-Item C:/etc/fluent-bit/fluent-bit-geneva-logs_infra.conf + } + } + Remove-Item C:/etc/fluent-bit/fluent-bit-geneva-logs_infra.conf } #register fluentd as a windows service @@ -291,39 +292,40 @@ function Set-EnvironmentVariables { } if (![string]::IsNullOrEmpty($isIgnoreProxySettings) -and $isIgnoreProxySettings.ToLower() -eq 'true') { Write-Host "Ignoring Proxy Setttings since IGNORE_PROXY_SETTINGS is - $($isIgnoreProxySettings)" - } else { - $proxy = "" - if (Test-Path /etc/ama-logs-secret/PROXY) { - # TODO: Change to ama-logs-secret before merging - $proxy = Get-Content /etc/ama-logs-secret/PROXY - Write-Host "Validating the proxy configuration since proxy configuration provided" - # valide the proxy endpoint configuration - if (![string]::IsNullOrEmpty($proxy)) { - $proxy = [string]$proxy.Trim(); + } + else { + $proxy = "" + if (Test-Path /etc/ama-logs-secret/PROXY) { + # TODO: Change to ama-logs-secret before merging + $proxy = Get-Content /etc/ama-logs-secret/PROXY + Write-Host "Validating the proxy configuration since proxy configuration provided" + # valide the proxy endpoint configuration if (![string]::IsNullOrEmpty($proxy)) { $proxy = [string]$proxy.Trim(); - $parts = $proxy -split "@" - if ($parts.Length -ne 2) { - Write-Host "Proxy is not using credentials..." - } - $subparts1 = $parts[0] -split "//" - if ($subparts1.Length -ne 2) { - Write-Host "Invalid ProxyConfiguration. EXITING....." - exit 1 - } - $protocol = $subparts1[0].ToLower().TrimEnd(":") - if (!($protocol -eq "http") -and !($protocol -eq "https")) { - Write-Host "Unsupported protocol in ProxyConfiguration $($proxy). EXITING....." - exit 1 - } + if (![string]::IsNullOrEmpty($proxy)) { + $proxy = [string]$proxy.Trim(); + $parts = $proxy -split "@" + if ($parts.Length -ne 2) { + Write-Host "Proxy is not using credentials..." + } + $subparts1 = $parts[0] -split "//" + if ($subparts1.Length -ne 2) { + Write-Host "Invalid ProxyConfiguration. EXITING....." + exit 1 + } + $protocol = $subparts1[0].ToLower().TrimEnd(":") + if (!($protocol -eq "http") -and !($protocol -eq "https")) { + Write-Host "Unsupported protocol in ProxyConfiguration $($proxy). EXITING....." + exit 1 + } + } } + Write-Host "Provided Proxy configuration is valid" } - Write-Host "Provided Proxy configuration is valid" - } - if (Test-Path /etc/ama-logs-secret/PROXYCERT.crt) { + if (Test-Path /etc/ama-logs-secret/PROXYCERT.crt) { Write-Host "Importing Proxy CA cert since Proxy CA cert configured" Import-Certificate -FilePath /etc/ama-logs-secret/PROXYCERT.crt -CertStoreLocation 'Cert:\LocalMachine\Root' -Verbose } @@ -487,15 +489,16 @@ function Read-Configs { if (![string]::IsNullOrEmpty($enableFbitInternalMetrics) -and $enableFbitInternalMetrics.ToLower() -eq 'true') { Write-Host "Fluent-bit Internal metrics configured" - } else { + } + else { Clear-Content C:/etc/fluent-bit/fluent-bit-internal-metrics.conf } $genevaLogsMultitenancy = [System.Environment]::GetEnvironmentVariable("GENEVA_LOGS_MULTI_TENANCY", "process") if (![string]::IsNullOrEmpty($genevaLogsMultitenancy)) { if ($genevaLogsMultitenancy.ToLower() -eq 'true') { - [System.Environment]::SetEnvironmentVariable("GENEVA_LOGS_MULTI_TENANCY", $genevaLogsMultitenancy, "machine") - Write-Host "Successfully set environment variable GENEVA_LOGS_MULTI_TENANCY - $($genevaLogsMultitenancy) for target 'machine'..." + [System.Environment]::SetEnvironmentVariable("GENEVA_LOGS_MULTI_TENANCY", $genevaLogsMultitenancy, "machine") + Write-Host "Successfully set environment variable GENEVA_LOGS_MULTI_TENANCY - $($genevaLogsMultitenancy) for target 'machine'..." } } else { @@ -513,7 +516,8 @@ function Read-Configs { Generate-GenevaTenantNameSpaceConfig Generate-GenevaInfraNameSpaceConfig } - } else { + } + else { $isAADMSIAuth = [System.Environment]::GetEnvironmentVariable("USING_AAD_MSI_AUTH", "process") if (![string]::IsNullOrEmpty($isAADMSIAuth) -and $isAADMSIAuth.ToLower() -eq 'true') { Set-CommonAMAEnvironmentVariables @@ -549,16 +553,16 @@ function Set-EnvironmentVariablesFromFile { } function Set-AgentConfigSchemaVersion { - #set agent config schema version - $schemaVersionFile = '/etc/config/settings/schema-version' - if (Test-Path $schemaVersionFile) { - $schemaVersion = Get-Content $schemaVersionFile | ForEach-Object { $_.TrimEnd() } - if ($schemaVersion.GetType().Name -eq 'String') { - [System.Environment]::SetEnvironmentVariable("AZMON_AGENT_CFG_SCHEMA_VERSION", $schemaVersion, "Process") - [System.Environment]::SetEnvironmentVariable("AZMON_AGENT_CFG_SCHEMA_VERSION", $schemaVersion, "Machine") - } - $env:AZMON_AGENT_CFG_SCHEMA_VERSION - } + #set agent config schema version + $schemaVersionFile = '/etc/config/settings/schema-version' + if (Test-Path $schemaVersionFile) { + $schemaVersion = Get-Content $schemaVersionFile | ForEach-Object { $_.TrimEnd() } + if ($schemaVersion.GetType().Name -eq 'String') { + [System.Environment]::SetEnvironmentVariable("AZMON_AGENT_CFG_SCHEMA_VERSION", $schemaVersion, "Process") + [System.Environment]::SetEnvironmentVariable("AZMON_AGENT_CFG_SCHEMA_VERSION", $schemaVersion, "Machine") + } + $env:AZMON_AGENT_CFG_SCHEMA_VERSION + } } function Get-ContainerRuntime { # containerd is the default runtime on AKS windows @@ -672,6 +676,37 @@ function Get-ContainerRuntime { return $containerRuntime } +function Disable-CustomMetrics-Config { + param ( + [string]$filePath + ) + $content = Get-Content -Path $filePath + + $inCustomMetricsBlock = $false + $customMetricsStart = "#CustomMetricsStart" + $customMetricsEnd = "#CustomMetricsEnd" + + $updatedContent = $content | ForEach-Object { + if ($_ -eq $customMetricsStart) { + $inCustomMetricsBlock = $true + } + + if ($inCustomMetricsBlock) { + $_ = "# " + $_ + } + + if ($_ -eq "# " + $customMetricsEnd) { + $inCustomMetricsBlock = $false + } + + $_ + } + + $updatedContent | Set-Content -Path $filePath + + Write-Output "Successfully commented the custom metrics block." +} + function Start-Fluent-Telegraf { Set-ProcessAndMachineEnvVariables "TELEMETRY_CUSTOM_PROM_MONITOR_PODS" "false" @@ -705,7 +740,8 @@ function Start-Fluent-Telegraf { # Run fluent-bit service first so that we do not miss any logs being forwarded by the telegraf service. # Run fluent-bit as a background job. Switch this to a windows service once fluent-bit supports natively running as a windows service Start-Job -ScriptBlock { Start-Process -NoNewWindow -FilePath "C:\opt\fluent-bit\bin\fluent-bit.exe" -ArgumentList @("-c", "C:/etc/fluent-bit/fluent-bit-geneva.conf", "-e", "C:\opt\amalogswindows\out_oms.so") } - } else { + } + else { $fluentbitConfFile = "C:/etc/fluent-bit/fluent-bit.conf" Write-Host "Using fluent-bit config: $($fluentbitConfFile)" # Run fluent-bit service first so that we do not miss any logs being forwarded by the telegraf service. @@ -719,7 +755,17 @@ function Start-Fluent-Telegraf { Start-Telegraf } - fluentd --reg-winsvc i --reg-winsvc-auto-start --winsvc-name fluentdwinaks --reg-winsvc-fluentdopt '-c C:/etc/fluent/fluent.conf -o C:/etc/fluent/fluent.log' + $enableCustomMetrics = [System.Environment]::GetEnvironmentVariable("ENABLE_CUSTOM_METRICS", "process") + if ([string]::IsNullOrEmpty($enableCustomMetrics) -or $enableCustomMetrics.ToLower() -ne 'true') { + Disable-CustomMetrics-Config -filePath 'C:/etc/fluent/fluent.conf' + } + + $isAADMSIAuth = [System.Environment]::GetEnvironmentVariable("USING_AAD_MSI_AUTH") + + # Start fluentd as a windows service only if custom metrics is enabled or legacy mode + if ($enableCustomMetrics.ToLower() -eq 'true' -or [string]::IsNullOrEmpty($isAADMSIAuth) -or $isAADMSIAuth.ToLower() -eq "false") { + fluentd --reg-winsvc i --reg-winsvc-auto-start --winsvc-name fluentdwinaks --reg-winsvc-fluentdopt '-c C:/etc/fluent/fluent.conf -o C:/etc/fluent/fluent.log' + } Notepad.exe | Out-Null } @@ -802,7 +848,8 @@ function Start-Telegraf { C:\opt\telegraf\telegraf.exe --service start Get-Service telegraf } - } else { + } + else { Write-Host "Telegraf not started since Fluentbit tcp listener is not up and running on port 25229" } } @@ -852,24 +899,24 @@ function Bootstrap-CACertificates { } function IsGenevaMode() { - $isGenevaLogsIntegration=$false - $isGenevaLogsMultitenancy=$false + $isGenevaLogsIntegration = $false + $isGenevaLogsMultitenancy = $false $genevaLogsIntegration = [System.Environment]::GetEnvironmentVariable("GENEVA_LOGS_INTEGRATION") $genevaLogsMultitenancy = [System.Environment]::GetEnvironmentVariable("GENEVA_LOGS_MULTI_TENANCY") $genevaLogsInfraNameSpaces = [System.Environment]::GetEnvironmentVariable("GENEVA_LOGS_INFRA_NAMESPACES") - $isGenevaLogsInfraNameSpacesEmpty=$true + $isGenevaLogsInfraNameSpacesEmpty = $true if (![string]::IsNullOrEmpty($genevaLogsIntegration) -and $genevaLogsIntegration.ToLower() -eq 'true') { - $isGenevaLogsIntegration=$true + $isGenevaLogsIntegration = $true } if (![string]::IsNullOrEmpty($genevaLogsMultitenancy) -and $genevaLogsMultitenancy.ToLower() -eq 'true') { - $isGenevaLogsMultitenancy=$true + $isGenevaLogsMultitenancy = $true } if (![string]::IsNullOrEmpty($genevaLogsInfraNameSpaces)) { - $isGenevaLogsInfraNameSpacesEmpty=$false + $isGenevaLogsInfraNameSpacesEmpty = $false } - if ($isGenevaLogsIntegration -and (!$isGenevaLogsMultitenancy -or !$isGenevaLogsInfraNameSpacesEmpty)){ - return $true + if ($isGenevaLogsIntegration -and (!$isGenevaLogsMultitenancy -or !$isGenevaLogsInfraNameSpacesEmpty)) { + return $true } return $false } @@ -898,7 +945,7 @@ $isGenevaModeVar = IsGenevaMode if ($isGenevaModeVar) { Write-Host "Starting Windows AMA in 1P Mode" #start Windows AMA - Start-Job -ScriptBlock { Start-Process -NoNewWindow -FilePath "C:\opt\windowsazuremonitoragent\windowsazuremonitoragent\Monitoring\Agent\MonAgentLauncher.exe" -ArgumentList @("-useenv")} + Start-Job -ScriptBlock { Start-Process -NoNewWindow -FilePath "C:\opt\windowsazuremonitoragent\windowsazuremonitoragent\Monitoring\Agent\MonAgentLauncher.exe" -ArgumentList @("-useenv") } if (![string]::IsNullOrEmpty($isAADMSIAuth) -and $isAADMSIAuth.ToLower() -eq 'true') { Write-Host "skipping agent onboarding via cert since AAD MSI Auth configured" } @@ -913,7 +960,7 @@ else { Write-Host "skipping agent onboarding via cert since AAD MSI Auth configured" #start Windows AMA - Start-Job -ScriptBlock { Start-Process -NoNewWindow -FilePath "C:\opt\windowsazuremonitoragent\windowsazuremonitoragent\Monitoring\Agent\MonAgentLauncher.exe" -ArgumentList @("-useenv")} + Start-Job -ScriptBlock { Start-Process -NoNewWindow -FilePath "C:\opt\windowsazuremonitoragent\windowsazuremonitoragent\Monitoring\Agent\MonAgentLauncher.exe" -ArgumentList @("-useenv") } $version = Get-Content -Path "C:\opt\windowsazuremonitoragent\version.txt" Write-Host $version } From 8ca95c2d0d5518c4948ff44ea233e7fc0f80da0a Mon Sep 17 00:00:00 2001 From: Amol Agrawal Date: Tue, 21 May 2024 18:31:20 +0000 Subject: [PATCH 04/15] fix bugs --- build/windows/installer/livenessprobe/livenessprobe.cpp | 9 ++++++--- source/plugins/go/src/telemetry.go | 5 +++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/build/windows/installer/livenessprobe/livenessprobe.cpp b/build/windows/installer/livenessprobe/livenessprobe.cpp index 614d818af..ca2083e3a 100644 --- a/build/windows/installer/livenessprobe/livenessprobe.cpp +++ b/build/windows/installer/livenessprobe/livenessprobe.cpp @@ -113,10 +113,13 @@ int _tmain(int argc, wchar_t *argv[]) wprintf_s(L"ERROR:Process:%s is not running\n", argv[1]); return NO_FLUENT_BIT_PROCESS; } + const DWORD bufferSize = 256; + wchar_t enableCustomMetricsValue[bufferSize]; + wchar_t msiModeValue[bufferSize]; + GetEnvironmentVariable(L"ENABLE_CUSTOM_METRICS", enableCustomMetricsValue, bufferSize); + GetEnvironmentVariable(L"USING_AAD_MSI_AUTH", msiModeValue, bufferSize); - DWORD enableCustomMetrics = GetEnvironmentVariable(L"ENABLE_CUSTOM_METRICS", nullptr, 0); - DWORD msiMode = GetEnvironmentVariable(L"USING_AAD_MSI_AUTH", nullptr, 0); - if (enableCustomMetrics == "true" || msiMode != "true") + if (_wcsicmp(enableCustomMetricsValue, L"true") == 0 || _wcsicmp(msiModeValue, L"true") != 0) { DWORD dwStatus = GetServiceStatus(argv[2]); if (dwStatus != SERVICE_RUNNING) diff --git a/source/plugins/go/src/telemetry.go b/source/plugins/go/src/telemetry.go index 22d9eb142..4b86a707d 100644 --- a/source/plugins/go/src/telemetry.go +++ b/source/plugins/go/src/telemetry.go @@ -269,6 +269,11 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) { telemetryDimensions["isHighLogScaleMode"] = isHighLogScaleMode } + enableCustomMetrics := os.Getenv("ENABLE_CUSTOM_METRICS") + if enableCustomMetrics != "" { + telemetryDimensions["enableCustomMetrics"] = enableCustomMetrics + } + telemetryDimensions["PromFbitChunkSize"] = os.Getenv("AZMON_FBIT_CHUNK_SIZE") telemetryDimensions["PromFbitBufferSize"] = os.Getenv("AZMON_FBIT_BUFFER_SIZE") telemetryDimensions["PromFbitMemBufLimit"] = os.Getenv("AZMON_FBIT_MEM_BUF_LIMIT") From c10a7e62ace4c9f1dde75c2a04c657af37597dfa Mon Sep 17 00:00:00 2001 From: Amol Agrawal Date: Wed, 22 May 2024 01:13:04 +0000 Subject: [PATCH 05/15] change name --- .../installer/scripts/tomlparser-common-agent-config.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build/common/installer/scripts/tomlparser-common-agent-config.rb b/build/common/installer/scripts/tomlparser-common-agent-config.rb index 7eb8c3c19..46b6d8880 100644 --- a/build/common/installer/scripts/tomlparser-common-agent-config.rb +++ b/build/common/installer/scripts/tomlparser-common-agent-config.rb @@ -83,8 +83,8 @@ def populateSettingValuesFromConfigMap(parsedConfig) end if !parsedConfig.nil? && !parsedConfig[:agent_settings].nil? - enable_custom_metrics = parsedConfig[:agent_settings][:enable_custom_metrics] - if !enable_custom_metrics.nil? && !enable_custom_metrics[:enabled].nil? + custom_metrics = parsedConfig[:agent_settings][:custom_metrics] + if !custom_metrics.nil? && !custom_metrics[:enabled].nil? @enableCustomMetrics = enable_custom_metrics[:enabled] puts "Using config map value: enabled = #{@enableCustomMetrics} for custom metrics" end From bee02a526bddd6c1c973b32396132ec55f61450c Mon Sep 17 00:00:00 2001 From: Amol Agrawal Date: Wed, 22 May 2024 18:02:12 +0000 Subject: [PATCH 06/15] remove markers --- build/linux/installer/conf/container.conf | 6 ------ build/linux/installer/conf/kube.conf | 11 +---------- build/windows/installer/conf/fluent.conf | 2 -- 3 files changed, 1 insertion(+), 18 deletions(-) diff --git a/build/linux/installer/conf/container.conf b/build/linux/installer/conf/container.conf index 149634836..5f987b0a5 100644 --- a/build/linux/installer/conf/container.conf +++ b/build/linux/installer/conf/container.conf @@ -8,7 +8,6 @@ bind 127.0.0.1 -#CustomMetricsStart # MDM metrics from telegraf @type tcp @@ -17,7 +16,6 @@ port 25228 format json -#CustomMetricsEnd # Container inventory @@ -35,7 +33,6 @@ @log_level info -#CustomMetricsStart #custom_metrics_mdm filter plugin @type cadvisor2mdm @@ -47,7 +44,6 @@ @type telegraf2mdm @log_level info -#CustomMetricsEnd #containerinventory @@ -101,7 +97,6 @@ keepalive true -#CustomMetricsStart @type mdm @log_level info @@ -118,7 +113,6 @@ retry_mdm_post_wait_minutes 30 -#CustomMetricsEnd #InsightsMetrics diff --git a/build/linux/installer/conf/kube.conf b/build/linux/installer/conf/kube.conf index 33c6b5106..5fbf1a378 100644 --- a/build/linux/installer/conf/kube.conf +++ b/build/linux/installer/conf/kube.conf @@ -59,14 +59,13 @@ keepalive true -#CustomMetricsStart #custom_metrics_mdm filter plugin for perf data from windows nodes @type cadvisor2mdm metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,pvUsedBytes @log_level info -#CustomMetricsEnd + #Kubernetes pod inventory @@ -164,12 +163,10 @@ keepalive true -#CustomMetricsStart @type inventory2mdm @log_level info -#CustomMetricsEnd #kubenodeinventory @@ -197,7 +194,6 @@ keepalive true -#CustomMetricsStart @type mdm @id out_mdm_nodeinventory @@ -216,7 +212,6 @@ retry_mdm_post_wait_minutes 30 -#CustomMetricsEnd #Kubernetes events @@ -253,7 +248,6 @@ keepalive true -#CustomMetricsStart #Kubernetes podmdm inventory @@ -281,7 +275,6 @@ retry_mdm_post_wait_minutes 30 -#CustomMetricsEnd #Kubernetes perf inventory @@ -344,7 +337,6 @@ keepalive true -#CustomMetricsStart @type mdm @id out_mdm_perf @@ -363,5 +355,4 @@ retry_mdm_post_wait_minutes 30 -#CustomMetricsEnd diff --git a/build/windows/installer/conf/fluent.conf b/build/windows/installer/conf/fluent.conf index 99cf3e24f..b3e423265 100644 --- a/build/windows/installer/conf/fluent.conf +++ b/build/windows/installer/conf/fluent.conf @@ -4,7 +4,6 @@ @log_level info -#CustomMetricsStart @type cadvisor_perf tag oms.api.cadvisorperf @@ -36,4 +35,3 @@ retry_mdm_post_wait_minutes 30 -#CustomMetricsEnd \ No newline at end of file From 784dc760a2e442adac22c2996b66b5e124f45ec0 Mon Sep 17 00:00:00 2001 From: Amol Agrawal Date: Wed, 22 May 2024 18:22:04 +0000 Subject: [PATCH 07/15] improve custom metrics deprecation logic --- build/linux/installer/conf/container-cm.conf | 141 +++++++ build/linux/installer/conf/container.conf | 38 -- build/linux/installer/conf/kube-cm.conf | 358 ++++++++++++++++++ build/linux/installer/conf/kube.conf | 77 ---- .../installer/datafiles/base_container.data | 2 + build/windows/installer/conf/fluent-cm.conf | 37 ++ build/windows/installer/conf/fluent.conf | 32 -- kubernetes/linux/main.sh | 8 +- kubernetes/windows/Dockerfile | 1 + kubernetes/windows/Dockerfile-dev-image | 1 + kubernetes/windows/main.ps1 | 5 +- 11 files changed, 546 insertions(+), 154 deletions(-) create mode 100644 build/linux/installer/conf/container-cm.conf create mode 100644 build/linux/installer/conf/kube-cm.conf create mode 100644 build/windows/installer/conf/fluent-cm.conf diff --git a/build/linux/installer/conf/container-cm.conf b/build/linux/installer/conf/container-cm.conf new file mode 100644 index 000000000..5f987b0a5 --- /dev/null +++ b/build/linux/installer/conf/container-cm.conf @@ -0,0 +1,141 @@ + # Fluentd config file for OMS Docker - container components (non kubeAPI) + + # Forward port 25225 for container logs + # gangams - not used and get ridoff after confirming safe to remove + + @type forward + port 25225 + bind 127.0.0.1 + + + # MDM metrics from telegraf + + @type tcp + tag oms.mdm.container.perf.telegraf.* + bind 0.0.0.0 + port 25228 + format json + + + # Container inventory + + @type containerinventory + tag oneagent.containerInsights.CONTAINER_INVENTORY_BLOB + run_interval 60 + @log_level info + + + #cadvisor perf + + @type cadvisor_perf + tag oneagent.containerInsights.LINUX_PERF_BLOB + run_interval 60 + @log_level info + + + #custom_metrics_mdm filter plugin + + @type cadvisor2mdm + metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes,pvUsedBytes + @log_level info + + + + @type telegraf2mdm + @log_level info + + + #containerinventory + + @type forward + @log_level info + send_timeout 30 + connect_timeout 30 + heartbeat_type none + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + + @type file + path /var/opt/microsoft/docker-cimprov/state/containerinventory*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length 20 + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + + keepalive true + + + #cadvisorperf + + @type forward + @log_level info + send_timeout 30 + connect_timeout 30 + heartbeat_type none + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + + @type file + path /var/opt/microsoft/docker-cimprov/state/cadvisorperf*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length 20 + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + + keepalive true + + + + @type mdm + @log_level info + + @type file + path /var/opt/microsoft/docker-cimprov/state/out_mdm_cdvisorperf*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + + retry_mdm_post_wait_minutes 30 + + + #InsightsMetrics + + @type forward + @log_level info + send_timeout 30 + connect_timeout 30 + heartbeat_type none + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + + @type file + path /var/opt/microsoft/docker-cimprov/state/insightsmetrics*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length 20 + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + + keepalive true + diff --git a/build/linux/installer/conf/container.conf b/build/linux/installer/conf/container.conf index 5f987b0a5..d500f5d45 100644 --- a/build/linux/installer/conf/container.conf +++ b/build/linux/installer/conf/container.conf @@ -8,15 +8,6 @@ bind 127.0.0.1 - # MDM metrics from telegraf - - @type tcp - tag oms.mdm.container.perf.telegraf.* - bind 0.0.0.0 - port 25228 - format json - - # Container inventory @type containerinventory @@ -33,18 +24,6 @@ @log_level info - #custom_metrics_mdm filter plugin - - @type cadvisor2mdm - metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes,pvUsedBytes - @log_level info - - - - @type telegraf2mdm - @log_level info - - #containerinventory @type forward @@ -97,23 +76,6 @@ keepalive true - - @type mdm - @log_level info - - @type file - path /var/opt/microsoft/docker-cimprov/state/out_mdm_cdvisorperf*.buffer - overflow_action drop_oldest_chunk - chunk_limit_size 4m - flush_interval 20s - retry_max_times 10 - retry_wait 5s - retry_max_interval 5m - flush_thread_count 5 - - retry_mdm_post_wait_minutes 30 - - #InsightsMetrics @type forward diff --git a/build/linux/installer/conf/kube-cm.conf b/build/linux/installer/conf/kube-cm.conf new file mode 100644 index 000000000..5fbf1a378 --- /dev/null +++ b/build/linux/installer/conf/kube-cm.conf @@ -0,0 +1,358 @@ + #fluent forward plugin + + workers "#{ENV['NUM_OF_FLUENTD_WORKERS']}" + root_dir /var/opt/microsoft/docker-cimprov/state + + + #@include windows_rs_containerinventory.conf + + #perf + + @type forward + @id out_perf_fwd + @log_level info + send_timeout 30 + connect_timeout 30 + heartbeat_type none + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + + @type file + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length "#{ENV['FLUENTD_KUBE_PERF_QUEUE_LIMIT_LENGTH']}" + flush_interval "#{ENV['FLUENTD_FLUSH_INTERVAL']}" + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + + keepalive true + + + #InsightsMetrics + + @type forward + @id out_insights_metrics_fwd + @log_level info + send_timeout 30 + connect_timeout 30 + heartbeat_type none + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + + @type file + path /var/opt/microsoft/docker-cimprov/state/insightsmetrics*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length "#{ENV['FLUENTD_QUEUE_LIMIT_LENGTH']}" + flush_interval "#{ENV['FLUENTD_FLUSH_INTERVAL']}" + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + + keepalive true + + + #custom_metrics_mdm filter plugin for perf data from windows nodes + + @type cadvisor2mdm + metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,pvUsedBytes + @log_level info + + + + + #Kubernetes pod inventory + + @type kube_podinventory + tag oneagent.containerInsights.KUBE_POD_INVENTORY_BLOB + run_interval 60 + @log_level info + + + #kubepodinventory + + @type forward + @log_level info + send_timeout 30 + connect_timeout 30 + heartbeat_type none + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + + @type file + path /var/opt/microsoft/docker-cimprov/state/kubepod*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length "#{ENV['FLUENTD_QUEUE_LIMIT_LENGTH']}" + flush_interval "#{ENV['FLUENTD_FLUSH_INTERVAL']}" + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + + keepalive true + + + #kubeservices + + @type forward + @log_level info + send_timeout 30 + connect_timeout 30 + heartbeat_type none + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + + @type file + path /var/opt/microsoft/docker-cimprov/state/kubeservices*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length "#{ENV['FLUENTD_QUEUE_LIMIT_LENGTH']}" + flush_interval "#{ENV['FLUENTD_FLUSH_INTERVAL']}" + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 2 + + keepalive true + + + + #Kubernetes Nodes + + @type kube_nodes + tag oneagent.containerInsights.KUBE_NODE_INVENTORY_BLOB + run_interval 60 + @log_level info + + + #containernodeinventory + + @type forward + @log_level info + send_timeout 30 + connect_timeout 30 + heartbeat_type none + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + + @type file + path /var/opt/microsoft/docker-cimprov/state/containernodeinventory*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length "#{ENV['FLUENTD_QUEUE_LIMIT_LENGTH']}" + flush_interval "#{ENV['FLUENTD_FLUSH_INTERVAL']}" + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 3 + + keepalive true + + + + @type inventory2mdm + @log_level info + + + #kubenodeinventory + + @type forward + @log_level info + send_timeout 30 + connect_timeout 30 + heartbeat_type none + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + + @type file + path /var/opt/microsoft/docker-cimprov/state/kubenode*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length "#{ENV['FLUENTD_QUEUE_LIMIT_LENGTH']}" + flush_interval "#{ENV['FLUENTD_FLUSH_INTERVAL']}" + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + + keepalive true + + + + @type mdm + @id out_mdm_nodeinventory + @log_level info + + @type file + path /var/opt/microsoft/docker-cimprov/state/out_mdm_nodeinventory*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length "#{ENV['FLUENTD_QUEUE_LIMIT_LENGTH']}" + flush_interval "#{ENV['FLUENTD_FLUSH_INTERVAL']}" + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + + retry_mdm_post_wait_minutes 30 + + + + #Kubernetes events + + @type kube_events + tag oneagent.containerInsights.KUBE_EVENTS_BLOB + run_interval 60 + @log_level info + + + #kubeevents + + @type forward + @log_level info + send_timeout 30 + connect_timeout 30 + heartbeat_type none + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + + @type file + path /var/opt/microsoft/docker-cimprov/state/kubeevents*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length "#{ENV['FLUENTD_QUEUE_LIMIT_LENGTH']}" + flush_interval "#{ENV['FLUENTD_FLUSH_INTERVAL']}" + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + + keepalive true + + + + #Kubernetes podmdm inventory + + @type kube_podmdminventory + run_interval 60 + @log_level info + + + + @type mdm + @id out_mdm_podinventory + @log_level info + + @type file + path /var/opt/microsoft/docker-cimprov/state/out_mdm_podinventory*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length "#{ENV['FLUENTD_QUEUE_LIMIT_LENGTH']}" + flush_interval "#{ENV['FLUENTD_FLUSH_INTERVAL']}" + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count "#{ENV['FLUENTD_MDM_FLUSH_THREAD_COUNT']}" + + retry_mdm_post_wait_minutes 30 + + + + + #Kubernetes perf inventory + + @type kube_perfinventory + tag oneagent.containerInsights.LINUX_PERF_BLOB + run_interval 60 + @log_level info + + + #Kubernetes Persistent Volume inventory + + @type kube_pvinventory + tag oneagent.containerInsights.KUBE_PV_INVENTORY_BLOB + run_interval 60 + @log_level info + + + #@include windows_rs_perf.conf + + #Kubernetes object state - deployments + + @type kubestate_deployments + tag oneagent.containerInsights.INSIGHTS_METRICS_BLOB + run_interval 60 + @log_level info + + + #Kubernetes object state - HPA + + @type kubestate_hpa + tag oneagent.containerInsights.INSIGHTS_METRICS_BLOB + run_interval 60 + @log_level info + + + #kubepvinventory + + @type forward + @log_level info + send_timeout 30 + connect_timeout 30 + heartbeat_type none + + host 0.0.0.0 + port "#{ENV['MDSD_FLUENT_SOCKET_PORT']}" + + + @type file + path /var/opt/microsoft/docker-cimprov/state/kubepv*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length "#{ENV['FLUENTD_QUEUE_LIMIT_LENGTH']}" + flush_interval "#{ENV['FLUENTD_FLUSH_INTERVAL']}" + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + + keepalive true + + + + @type mdm + @id out_mdm_perf + @log_level info + + @type file + path /var/opt/microsoft/docker-cimprov/state/out_mdm_cdvisorperf*.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + queue_limit_length "#{ENV['FLUENTD_QUEUE_LIMIT_LENGTH']}" + flush_interval "#{ENV['FLUENTD_FLUSH_INTERVAL']}" + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + + retry_mdm_post_wait_minutes 30 + + diff --git a/build/linux/installer/conf/kube.conf b/build/linux/installer/conf/kube.conf index 5fbf1a378..4c81a7625 100644 --- a/build/linux/installer/conf/kube.conf +++ b/build/linux/installer/conf/kube.conf @@ -59,14 +59,6 @@ keepalive true - #custom_metrics_mdm filter plugin for perf data from windows nodes - - @type cadvisor2mdm - metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,pvUsedBytes - @log_level info - - - #Kubernetes pod inventory @@ -163,11 +155,6 @@ keepalive true - - @type inventory2mdm - @log_level info - - #kubenodeinventory @type forward @@ -194,24 +181,6 @@ keepalive true - - @type mdm - @id out_mdm_nodeinventory - @log_level info - - @type file - path /var/opt/microsoft/docker-cimprov/state/out_mdm_nodeinventory*.buffer - overflow_action drop_oldest_chunk - chunk_limit_size 4m - queue_limit_length "#{ENV['FLUENTD_QUEUE_LIMIT_LENGTH']}" - flush_interval "#{ENV['FLUENTD_FLUSH_INTERVAL']}" - retry_max_times 10 - retry_wait 5s - retry_max_interval 5m - flush_thread_count 5 - - retry_mdm_post_wait_minutes 30 - #Kubernetes events @@ -248,33 +217,6 @@ keepalive true - - #Kubernetes podmdm inventory - - @type kube_podmdminventory - run_interval 60 - @log_level info - - - - @type mdm - @id out_mdm_podinventory - @log_level info - - @type file - path /var/opt/microsoft/docker-cimprov/state/out_mdm_podinventory*.buffer - overflow_action drop_oldest_chunk - chunk_limit_size 4m - queue_limit_length "#{ENV['FLUENTD_QUEUE_LIMIT_LENGTH']}" - flush_interval "#{ENV['FLUENTD_FLUSH_INTERVAL']}" - retry_max_times 10 - retry_wait 5s - retry_max_interval 5m - flush_thread_count "#{ENV['FLUENTD_MDM_FLUSH_THREAD_COUNT']}" - - retry_mdm_post_wait_minutes 30 - - #Kubernetes perf inventory @@ -336,23 +278,4 @@ keepalive true - - - @type mdm - @id out_mdm_perf - @log_level info - - @type file - path /var/opt/microsoft/docker-cimprov/state/out_mdm_cdvisorperf*.buffer - overflow_action drop_oldest_chunk - chunk_limit_size 4m - queue_limit_length "#{ENV['FLUENTD_QUEUE_LIMIT_LENGTH']}" - flush_interval "#{ENV['FLUENTD_FLUSH_INTERVAL']}" - retry_max_times 10 - retry_wait 5s - retry_max_interval 5m - flush_thread_count 5 - - retry_mdm_post_wait_minutes 30 - diff --git a/build/linux/installer/datafiles/base_container.data b/build/linux/installer/datafiles/base_container.data index ef93a1b4e..1cd6b6b90 100644 --- a/build/linux/installer/datafiles/base_container.data +++ b/build/linux/installer/datafiles/base_container.data @@ -132,7 +132,9 @@ MAINTAINER: 'Microsoft Corporation' /etc/fluent/plugin/extension_utils.rb; source/plugins/ruby/extension_utils.rb; 644; root; root /etc/fluent/kube.conf; build/linux/installer/conf/kube.conf; 644; root; root +/etc/fluent/kube-cm.conf; build/linux/installer/conf/kube-cm.conf; 644; root; root /etc/fluent/container.conf; build/linux/installer/conf/container.conf; 644; root; root +/etc/fluent/container-cm.conf; build/linux/installer/conf/container-cm.conf; 644; root; root /etc/fluent/windows_rs_containerinventory.conf; build/linux/installer/conf/windows_rs_containerinventory.conf; 644; root; root /etc/fluent/windows_rs_perf.conf; build/linux/installer/conf/windows_rs_perf.conf; 644; root; root diff --git a/build/windows/installer/conf/fluent-cm.conf b/build/windows/installer/conf/fluent-cm.conf new file mode 100644 index 000000000..b3e423265 --- /dev/null +++ b/build/windows/installer/conf/fluent-cm.conf @@ -0,0 +1,37 @@ + + @type heartbeat_request + run_interval 30m + @log_level info + + + + @type cadvisor_perf + tag oms.api.cadvisorperf + run_interval 60 + @log_level info + + +#custom_metrics_mdm filter plugin + + @type cadvisor2mdm + metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes + log_path /etc/amalogswindows/filter_cadvisor2mdm.log + @log_level info + + + + @type mdm + @log_level info + + @type file + path /etc/amalogswindows/out_mdm_cdvisorperf.buffer + overflow_action drop_oldest_chunk + chunk_limit_size 4m + flush_interval 20s + retry_max_times 10 + retry_wait 5s + retry_max_interval 5m + flush_thread_count 5 + + retry_mdm_post_wait_minutes 30 + diff --git a/build/windows/installer/conf/fluent.conf b/build/windows/installer/conf/fluent.conf index b3e423265..52d5db97b 100644 --- a/build/windows/installer/conf/fluent.conf +++ b/build/windows/installer/conf/fluent.conf @@ -3,35 +3,3 @@ run_interval 30m @log_level info - - - @type cadvisor_perf - tag oms.api.cadvisorperf - run_interval 60 - @log_level info - - -#custom_metrics_mdm filter plugin - - @type cadvisor2mdm - metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes - log_path /etc/amalogswindows/filter_cadvisor2mdm.log - @log_level info - - - - @type mdm - @log_level info - - @type file - path /etc/amalogswindows/out_mdm_cdvisorperf.buffer - overflow_action drop_oldest_chunk - chunk_limit_size 4m - flush_interval 20s - retry_max_times 10 - retry_wait 5s - retry_max_interval 5m - flush_thread_count 5 - - retry_mdm_post_wait_minutes 30 - diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh index b5969b3ab..ca88a7cb2 100644 --- a/kubernetes/linux/main.sh +++ b/kubernetes/linux/main.sh @@ -1080,8 +1080,8 @@ if [ "$AZMON_RESOURCE_OPTIMIZATION_ENABLED" != "true" ]; then if [ ! -e "/etc/config/kube.conf" ]; then if [ "$LOGS_AND_EVENTS_ONLY" != "true" ]; then echo "*** starting fluentd v1 in daemonset" - if [ "${ENABLE_CUSTOM_METRICS}" != "true" ]; then - sed -i '/^#CustomMetricsStart/,/^#CustomMetricsEnd/ s/^/# /' /etc/fluent/container.conf + if [ "${ENABLE_CUSTOM_METRICS}" == "true" ]; then + mv /etc/fluent/container-cm.conf /etc/fluent/container.conf fi fluentd -c /etc/fluent/container.conf -o /var/opt/microsoft/docker-cimprov/log/fluentd.log --log-rotate-age 5 --log-rotate-size 20971520 & else @@ -1089,8 +1089,8 @@ if [ "$AZMON_RESOURCE_OPTIMIZATION_ENABLED" != "true" ]; then fi else echo "*** starting fluentd v1 in replicaset" - if [ "${ENABLE_CUSTOM_METRICS}" != "true" ]; then - sed -i '/^#CustomMetricsStart/,/^#CustomMetricsEnd/ s/^/# /' /etc/fluent/kube.conf + if [ "${ENABLE_CUSTOM_METRICS}" == "true" ]; then + mv /etc/fluent/kube-cm.conf /etc/fluent/kube.conf fi fluentd -c /etc/fluent/kube.conf -o /var/opt/microsoft/docker-cimprov/log/fluentd.log --log-rotate-age 5 --log-rotate-size 20971520 & fi diff --git a/kubernetes/windows/Dockerfile b/kubernetes/windows/Dockerfile index c81ac6b95..8522a337a 100644 --- a/kubernetes/windows/Dockerfile +++ b/kubernetes/windows/Dockerfile @@ -68,6 +68,7 @@ COPY ./amalogswindows/perf.so /opt/fluent-bit/bin/perf.so # copy fluent, fluent-bit and out_oms conf files COPY ./amalogswindows/installer/conf/fluent.conf /etc/fluent/ +COPY ./amalogswindows/installer/conf/fluent-cm.conf /etc/fluent/ COPY ./amalogswindows/installer/conf/fluent-bit.conf /etc/fluent-bit COPY ./amalogswindows/installer/conf/azm-containers-parser.conf /etc/fluent-bit/ COPY ./amalogswindows/installer/conf/azm-containers-parser-multiline.conf /etc/fluent-bit/ diff --git a/kubernetes/windows/Dockerfile-dev-image b/kubernetes/windows/Dockerfile-dev-image index 99fc5f699..c6e2cad32 100644 --- a/kubernetes/windows/Dockerfile-dev-image +++ b/kubernetes/windows/Dockerfile-dev-image @@ -19,6 +19,7 @@ COPY ./amalogswindows/out_oms.so /opt/amalogswindows/out_oms.so # copy fluent, fluent-bit and out_oms conf files COPY ./amalogswindows/installer/conf/fluent.conf /etc/fluent/ +COPY ./amalogswindows/installer/conf/fluent-cm.conf /etc/fluent/ COPY ./amalogswindows/installer/conf/fluent-bit.conf /etc/fluent-bit COPY ./amalogswindows/installer/conf/azm-containers-parser.conf /etc/fluent-bit/ COPY ./amalogswindows/installer/conf/azm-containers-parser-multiline.conf /etc/fluent-bit/ diff --git a/kubernetes/windows/main.ps1 b/kubernetes/windows/main.ps1 index 9e2a4c5ff..706ed62ec 100644 --- a/kubernetes/windows/main.ps1 +++ b/kubernetes/windows/main.ps1 @@ -756,12 +756,11 @@ function Start-Fluent-Telegraf { } $enableCustomMetrics = [System.Environment]::GetEnvironmentVariable("ENABLE_CUSTOM_METRICS", "process") - if ([string]::IsNullOrEmpty($enableCustomMetrics) -or $enableCustomMetrics.ToLower() -ne 'true') { - Disable-CustomMetrics-Config -filePath 'C:/etc/fluent/fluent.conf' + if ($enableCustomMetrics.ToLower() -eq 'true') { + Move-Item -Path "C:/etc/fluent/fluent-cm.conf" -Destination "C:/etc/fluent/fluent.conf" -Force } $isAADMSIAuth = [System.Environment]::GetEnvironmentVariable("USING_AAD_MSI_AUTH") - # Start fluentd as a windows service only if custom metrics is enabled or legacy mode if ($enableCustomMetrics.ToLower() -eq 'true' -or [string]::IsNullOrEmpty($isAADMSIAuth) -or $isAADMSIAuth.ToLower() -eq "false") { fluentd --reg-winsvc i --reg-winsvc-auto-start --winsvc-name fluentdwinaks --reg-winsvc-fluentdopt '-c C:/etc/fluent/fluent.conf -o C:/etc/fluent/fluent.log' From 60be9ec6e0d1e195cf979af29b829395a017f942 Mon Sep 17 00:00:00 2001 From: Amol Agrawal Date: Wed, 22 May 2024 18:24:41 +0000 Subject: [PATCH 08/15] remove unnecessary function --- kubernetes/windows/main.ps1 | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/kubernetes/windows/main.ps1 b/kubernetes/windows/main.ps1 index 706ed62ec..dafdfaf32 100644 --- a/kubernetes/windows/main.ps1 +++ b/kubernetes/windows/main.ps1 @@ -676,37 +676,6 @@ function Get-ContainerRuntime { return $containerRuntime } -function Disable-CustomMetrics-Config { - param ( - [string]$filePath - ) - $content = Get-Content -Path $filePath - - $inCustomMetricsBlock = $false - $customMetricsStart = "#CustomMetricsStart" - $customMetricsEnd = "#CustomMetricsEnd" - - $updatedContent = $content | ForEach-Object { - if ($_ -eq $customMetricsStart) { - $inCustomMetricsBlock = $true - } - - if ($inCustomMetricsBlock) { - $_ = "# " + $_ - } - - if ($_ -eq "# " + $customMetricsEnd) { - $inCustomMetricsBlock = $false - } - - $_ - } - - $updatedContent | Set-Content -Path $filePath - - Write-Output "Successfully commented the custom metrics block." -} - function Start-Fluent-Telegraf { Set-ProcessAndMachineEnvVariables "TELEMETRY_CUSTOM_PROM_MONITOR_PODS" "false" From 6fe6ee7b451091096058c8cfeaf804834030d587 Mon Sep 17 00:00:00 2001 From: Amol Agrawal Date: Wed, 22 May 2024 21:36:32 +0000 Subject: [PATCH 09/15] update win scripts --- build/windows/installer/livenessprobe/livenessprobe.cpp | 2 +- kubernetes/windows/main.ps1 | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/build/windows/installer/livenessprobe/livenessprobe.cpp b/build/windows/installer/livenessprobe/livenessprobe.cpp index ca2083e3a..86dbd8561 100644 --- a/build/windows/installer/livenessprobe/livenessprobe.cpp +++ b/build/windows/installer/livenessprobe/livenessprobe.cpp @@ -113,7 +113,7 @@ int _tmain(int argc, wchar_t *argv[]) wprintf_s(L"ERROR:Process:%s is not running\n", argv[1]); return NO_FLUENT_BIT_PROCESS; } - const DWORD bufferSize = 256; + const DWORD bufferSize = 16; wchar_t enableCustomMetricsValue[bufferSize]; wchar_t msiModeValue[bufferSize]; GetEnvironmentVariable(L"ENABLE_CUSTOM_METRICS", enableCustomMetricsValue, bufferSize); diff --git a/kubernetes/windows/main.ps1 b/kubernetes/windows/main.ps1 index dafdfaf32..f6b72018d 100644 --- a/kubernetes/windows/main.ps1 +++ b/kubernetes/windows/main.ps1 @@ -725,13 +725,13 @@ function Start-Fluent-Telegraf { } $enableCustomMetrics = [System.Environment]::GetEnvironmentVariable("ENABLE_CUSTOM_METRICS", "process") - if ($enableCustomMetrics.ToLower() -eq 'true') { + if (![string]::IsNullOrEmpty($enableCustomMetrics) -and $enableCustomMetrics.ToLower() -eq 'true') { Move-Item -Path "C:/etc/fluent/fluent-cm.conf" -Destination "C:/etc/fluent/fluent.conf" -Force } $isAADMSIAuth = [System.Environment]::GetEnvironmentVariable("USING_AAD_MSI_AUTH") # Start fluentd as a windows service only if custom metrics is enabled or legacy mode - if ($enableCustomMetrics.ToLower() -eq 'true' -or [string]::IsNullOrEmpty($isAADMSIAuth) -or $isAADMSIAuth.ToLower() -eq "false") { + if ((![string]::IsNullOrEmpty($enableCustomMetrics) -and $enableCustomMetrics.ToLower() -eq 'true') -or [string]::IsNullOrEmpty($isAADMSIAuth) -or $isAADMSIAuth.ToLower() -eq "false") { fluentd --reg-winsvc i --reg-winsvc-auto-start --winsvc-name fluentdwinaks --reg-winsvc-fluentdopt '-c C:/etc/fluent/fluent.conf -o C:/etc/fluent/fluent.log' } From 5325d5e617cef4148becd898db854e0ec990ed13 Mon Sep 17 00:00:00 2001 From: Amol Agrawal Date: Wed, 22 May 2024 21:38:45 +0000 Subject: [PATCH 10/15] fix bug --- .../common/installer/scripts/tomlparser-common-agent-config.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/common/installer/scripts/tomlparser-common-agent-config.rb b/build/common/installer/scripts/tomlparser-common-agent-config.rb index 46b6d8880..21fb15b28 100644 --- a/build/common/installer/scripts/tomlparser-common-agent-config.rb +++ b/build/common/installer/scripts/tomlparser-common-agent-config.rb @@ -85,7 +85,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) if !parsedConfig.nil? && !parsedConfig[:agent_settings].nil? custom_metrics = parsedConfig[:agent_settings][:custom_metrics] if !custom_metrics.nil? && !custom_metrics[:enabled].nil? - @enableCustomMetrics = enable_custom_metrics[:enabled] + @enableCustomMetrics = custom_metrics[:enabled] puts "Using config map value: enabled = #{@enableCustomMetrics} for custom metrics" end end From 0457e706140f0ef8a3c8f9f29f1fbba7a58a2d19 Mon Sep 17 00:00:00 2001 From: Amol Agrawal Date: Wed, 22 May 2024 22:45:10 +0000 Subject: [PATCH 11/15] remove extra changes --- kubernetes/windows/main.ps1 | 140 +++++++++++++++++------------------- 1 file changed, 67 insertions(+), 73 deletions(-) diff --git a/kubernetes/windows/main.ps1 b/kubernetes/windows/main.ps1 index f6b72018d..67b89f5ff 100644 --- a/kubernetes/windows/main.ps1 +++ b/kubernetes/windows/main.ps1 @@ -45,8 +45,7 @@ function Test-FluentbitTcpListener { [System.Environment]::SetEnvironmentVariable("WAITTIME_PORT_25229", $waitTimeSecs, "Process") [System.Environment]::SetEnvironmentVariable("WAITTIME_PORT_25229", $waitTimeSecs, "Machine") Write-Host "Successfully set environment variable WAITTIME_PORT_25229 - $($waitTimeSecs) for target 'machine'..." - } - else { + } else { Write-Host "Failed to set environment variable WAITTIME_PORT_25229 for target 'machine' since it is either null or empty" $waitTimeSecs = 30 } @@ -188,13 +187,13 @@ function Set-AMA3PEnvironmentVariables { } function Generate-GenevaTenantNameSpaceConfig { - $genevaLogsTenantNameSpaces = [System.Environment]::GetEnvironmentVariable("GENEVA_LOGS_TENANT_NAMESPACES", "process") + $genevaLogsTenantNameSpaces = [System.Environment]::GetEnvironmentVariable("GENEVA_LOGS_TENANT_NAMESPACES", "process") if (![string]::IsNullOrEmpty($genevaLogsTenantNameSpaces)) { [System.Environment]::SetEnvironmentVariable("GENEVA_LOGS_TENANT_NAMESPACES", $genevaLogsTenantNameSpaces, "machine") $genevaLogsTenantNameSpacesArray = $genevaLogsTenantNameSpaces.Split(",") for ($i = 0; $i -lt $genevaLogsTenantNameSpacesArray.Length; $i = $i + 1) { - $tenantName = $genevaLogsTenantNameSpacesArray[$i] - Copy-Item C:/etc/fluent-bit/fluent-bit-geneva-logs_tenant.conf -Destination C:/etc/fluent-bit/fluent-bit-geneva-logs_$tenantName.conf + $tenantName = $genevaLogsTenantNameSpacesArray[$i] + Copy-Item C:/etc/fluent-bit/fluent-bit-geneva-logs_tenant.conf -Destination C:/etc/fluent-bit/fluent-bit-geneva-logs_$tenantName.conf (Get-Content -Path C:/etc/fluent-bit/fluent-bit-geneva-logs_$tenantName.conf -Raw) -replace '', $tenantName | Set-Content C:/etc/fluent-bit/fluent-bit-geneva-logs_$tenantName.conf } } @@ -202,18 +201,18 @@ function Generate-GenevaTenantNameSpaceConfig { } function Generate-GenevaInfraNameSpaceConfig { - $genevaLogsInfraNameSpaces = [System.Environment]::GetEnvironmentVariable("GENEVA_LOGS_INFRA_NAMESPACES", "process") - if (![string]::IsNullOrEmpty($genevaLogsInfraNameSpaces)) { - [System.Environment]::SetEnvironmentVariable("GENEVA_LOGS_INFRA_NAMESPACES", $genevaLogsInfraNameSpaces, "machine") - $genevaLogsInfraNameSpacesArray = $genevaLogsInfraNameSpaces.Split(",") - for ($i = 0; $i -lt $genevaLogsInfraNameSpacesArray.Length; $i = $i + 1) { - $infraNameSpaceName = $genevaLogsInfraNameSpacesArray[$i] - $infraNamespaceWithoutSuffix = $infraNameSpaceName.TrimEnd("_*") - Copy-Item C:/etc/fluent-bit/fluent-bit-geneva-logs_infra.conf -Destination C:/etc/fluent-bit/fluent-bit-geneva-logs_$infraNamespaceWithoutSuffix.conf + $genevaLogsInfraNameSpaces = [System.Environment]::GetEnvironmentVariable("GENEVA_LOGS_INFRA_NAMESPACES", "process") + if (![string]::IsNullOrEmpty($genevaLogsInfraNameSpaces)) { + [System.Environment]::SetEnvironmentVariable("GENEVA_LOGS_INFRA_NAMESPACES", $genevaLogsInfraNameSpaces, "machine") + $genevaLogsInfraNameSpacesArray = $genevaLogsInfraNameSpaces.Split(",") + for ($i = 0; $i -lt $genevaLogsInfraNameSpacesArray.Length; $i = $i + 1) { + $infraNameSpaceName = $genevaLogsInfraNameSpacesArray[$i] + $infraNamespaceWithoutSuffix = $infraNameSpaceName.TrimEnd("_*") + Copy-Item C:/etc/fluent-bit/fluent-bit-geneva-logs_infra.conf -Destination C:/etc/fluent-bit/fluent-bit-geneva-logs_$infraNamespaceWithoutSuffix.conf (Get-Content -Path C:/etc/fluent-bit/fluent-bit-geneva-logs_$infraNamespaceWithoutSuffix.conf -Raw) -replace '', $infraNameSpaceName | Set-Content C:/etc/fluent-bit/fluent-bit-geneva-logs_$infraNamespaceWithoutSuffix.conf - } - } - Remove-Item C:/etc/fluent-bit/fluent-bit-geneva-logs_infra.conf + } + } + Remove-Item C:/etc/fluent-bit/fluent-bit-geneva-logs_infra.conf } #register fluentd as a windows service @@ -292,40 +291,39 @@ function Set-EnvironmentVariables { } if (![string]::IsNullOrEmpty($isIgnoreProxySettings) -and $isIgnoreProxySettings.ToLower() -eq 'true') { Write-Host "Ignoring Proxy Setttings since IGNORE_PROXY_SETTINGS is - $($isIgnoreProxySettings)" - } - else { - $proxy = "" - if (Test-Path /etc/ama-logs-secret/PROXY) { - # TODO: Change to ama-logs-secret before merging - $proxy = Get-Content /etc/ama-logs-secret/PROXY - Write-Host "Validating the proxy configuration since proxy configuration provided" - # valide the proxy endpoint configuration + } else { + $proxy = "" + if (Test-Path /etc/ama-logs-secret/PROXY) { + # TODO: Change to ama-logs-secret before merging + $proxy = Get-Content /etc/ama-logs-secret/PROXY + Write-Host "Validating the proxy configuration since proxy configuration provided" + # valide the proxy endpoint configuration + if (![string]::IsNullOrEmpty($proxy)) { + $proxy = [string]$proxy.Trim(); if (![string]::IsNullOrEmpty($proxy)) { $proxy = [string]$proxy.Trim(); - if (![string]::IsNullOrEmpty($proxy)) { - $proxy = [string]$proxy.Trim(); - $parts = $proxy -split "@" - if ($parts.Length -ne 2) { - Write-Host "Proxy is not using credentials..." - } - $subparts1 = $parts[0] -split "//" - if ($subparts1.Length -ne 2) { - Write-Host "Invalid ProxyConfiguration. EXITING....." - exit 1 - } - $protocol = $subparts1[0].ToLower().TrimEnd(":") - if (!($protocol -eq "http") -and !($protocol -eq "https")) { - Write-Host "Unsupported protocol in ProxyConfiguration $($proxy). EXITING....." - exit 1 - } - + $parts = $proxy -split "@" + if ($parts.Length -ne 2) { + Write-Host "Proxy is not using credentials..." + } + $subparts1 = $parts[0] -split "//" + if ($subparts1.Length -ne 2) { + Write-Host "Invalid ProxyConfiguration. EXITING....." + exit 1 } + $protocol = $subparts1[0].ToLower().TrimEnd(":") + if (!($protocol -eq "http") -and !($protocol -eq "https")) { + Write-Host "Unsupported protocol in ProxyConfiguration $($proxy). EXITING....." + exit 1 + } + } - Write-Host "Provided Proxy configuration is valid" } + Write-Host "Provided Proxy configuration is valid" + } - if (Test-Path /etc/ama-logs-secret/PROXYCERT.crt) { + if (Test-Path /etc/ama-logs-secret/PROXYCERT.crt) { Write-Host "Importing Proxy CA cert since Proxy CA cert configured" Import-Certificate -FilePath /etc/ama-logs-secret/PROXYCERT.crt -CertStoreLocation 'Cert:\LocalMachine\Root' -Verbose } @@ -489,16 +487,15 @@ function Read-Configs { if (![string]::IsNullOrEmpty($enableFbitInternalMetrics) -and $enableFbitInternalMetrics.ToLower() -eq 'true') { Write-Host "Fluent-bit Internal metrics configured" - } - else { + } else { Clear-Content C:/etc/fluent-bit/fluent-bit-internal-metrics.conf } $genevaLogsMultitenancy = [System.Environment]::GetEnvironmentVariable("GENEVA_LOGS_MULTI_TENANCY", "process") if (![string]::IsNullOrEmpty($genevaLogsMultitenancy)) { if ($genevaLogsMultitenancy.ToLower() -eq 'true') { - [System.Environment]::SetEnvironmentVariable("GENEVA_LOGS_MULTI_TENANCY", $genevaLogsMultitenancy, "machine") - Write-Host "Successfully set environment variable GENEVA_LOGS_MULTI_TENANCY - $($genevaLogsMultitenancy) for target 'machine'..." + [System.Environment]::SetEnvironmentVariable("GENEVA_LOGS_MULTI_TENANCY", $genevaLogsMultitenancy, "machine") + Write-Host "Successfully set environment variable GENEVA_LOGS_MULTI_TENANCY - $($genevaLogsMultitenancy) for target 'machine'..." } } else { @@ -516,8 +513,7 @@ function Read-Configs { Generate-GenevaTenantNameSpaceConfig Generate-GenevaInfraNameSpaceConfig } - } - else { + } else { $isAADMSIAuth = [System.Environment]::GetEnvironmentVariable("USING_AAD_MSI_AUTH", "process") if (![string]::IsNullOrEmpty($isAADMSIAuth) -and $isAADMSIAuth.ToLower() -eq 'true') { Set-CommonAMAEnvironmentVariables @@ -553,16 +549,16 @@ function Set-EnvironmentVariablesFromFile { } function Set-AgentConfigSchemaVersion { - #set agent config schema version - $schemaVersionFile = '/etc/config/settings/schema-version' - if (Test-Path $schemaVersionFile) { - $schemaVersion = Get-Content $schemaVersionFile | ForEach-Object { $_.TrimEnd() } - if ($schemaVersion.GetType().Name -eq 'String') { - [System.Environment]::SetEnvironmentVariable("AZMON_AGENT_CFG_SCHEMA_VERSION", $schemaVersion, "Process") - [System.Environment]::SetEnvironmentVariable("AZMON_AGENT_CFG_SCHEMA_VERSION", $schemaVersion, "Machine") - } - $env:AZMON_AGENT_CFG_SCHEMA_VERSION - } + #set agent config schema version + $schemaVersionFile = '/etc/config/settings/schema-version' + if (Test-Path $schemaVersionFile) { + $schemaVersion = Get-Content $schemaVersionFile | ForEach-Object { $_.TrimEnd() } + if ($schemaVersion.GetType().Name -eq 'String') { + [System.Environment]::SetEnvironmentVariable("AZMON_AGENT_CFG_SCHEMA_VERSION", $schemaVersion, "Process") + [System.Environment]::SetEnvironmentVariable("AZMON_AGENT_CFG_SCHEMA_VERSION", $schemaVersion, "Machine") + } + $env:AZMON_AGENT_CFG_SCHEMA_VERSION + } } function Get-ContainerRuntime { # containerd is the default runtime on AKS windows @@ -709,8 +705,7 @@ function Start-Fluent-Telegraf { # Run fluent-bit service first so that we do not miss any logs being forwarded by the telegraf service. # Run fluent-bit as a background job. Switch this to a windows service once fluent-bit supports natively running as a windows service Start-Job -ScriptBlock { Start-Process -NoNewWindow -FilePath "C:\opt\fluent-bit\bin\fluent-bit.exe" -ArgumentList @("-c", "C:/etc/fluent-bit/fluent-bit-geneva.conf", "-e", "C:\opt\amalogswindows\out_oms.so") } - } - else { + } else { $fluentbitConfFile = "C:/etc/fluent-bit/fluent-bit.conf" Write-Host "Using fluent-bit config: $($fluentbitConfFile)" # Run fluent-bit service first so that we do not miss any logs being forwarded by the telegraf service. @@ -816,8 +811,7 @@ function Start-Telegraf { C:\opt\telegraf\telegraf.exe --service start Get-Service telegraf } - } - else { + } else { Write-Host "Telegraf not started since Fluentbit tcp listener is not up and running on port 25229" } } @@ -867,24 +861,24 @@ function Bootstrap-CACertificates { } function IsGenevaMode() { - $isGenevaLogsIntegration = $false - $isGenevaLogsMultitenancy = $false + $isGenevaLogsIntegration=$false + $isGenevaLogsMultitenancy=$false $genevaLogsIntegration = [System.Environment]::GetEnvironmentVariable("GENEVA_LOGS_INTEGRATION") $genevaLogsMultitenancy = [System.Environment]::GetEnvironmentVariable("GENEVA_LOGS_MULTI_TENANCY") $genevaLogsInfraNameSpaces = [System.Environment]::GetEnvironmentVariable("GENEVA_LOGS_INFRA_NAMESPACES") - $isGenevaLogsInfraNameSpacesEmpty = $true + $isGenevaLogsInfraNameSpacesEmpty=$true if (![string]::IsNullOrEmpty($genevaLogsIntegration) -and $genevaLogsIntegration.ToLower() -eq 'true') { - $isGenevaLogsIntegration = $true + $isGenevaLogsIntegration=$true } if (![string]::IsNullOrEmpty($genevaLogsMultitenancy) -and $genevaLogsMultitenancy.ToLower() -eq 'true') { - $isGenevaLogsMultitenancy = $true + $isGenevaLogsMultitenancy=$true } if (![string]::IsNullOrEmpty($genevaLogsInfraNameSpaces)) { - $isGenevaLogsInfraNameSpacesEmpty = $false + $isGenevaLogsInfraNameSpacesEmpty=$false } - if ($isGenevaLogsIntegration -and (!$isGenevaLogsMultitenancy -or !$isGenevaLogsInfraNameSpacesEmpty)) { - return $true + if ($isGenevaLogsIntegration -and (!$isGenevaLogsMultitenancy -or !$isGenevaLogsInfraNameSpacesEmpty)){ + return $true } return $false } @@ -913,7 +907,7 @@ $isGenevaModeVar = IsGenevaMode if ($isGenevaModeVar) { Write-Host "Starting Windows AMA in 1P Mode" #start Windows AMA - Start-Job -ScriptBlock { Start-Process -NoNewWindow -FilePath "C:\opt\windowsazuremonitoragent\windowsazuremonitoragent\Monitoring\Agent\MonAgentLauncher.exe" -ArgumentList @("-useenv") } + Start-Job -ScriptBlock { Start-Process -NoNewWindow -FilePath "C:\opt\windowsazuremonitoragent\windowsazuremonitoragent\Monitoring\Agent\MonAgentLauncher.exe" -ArgumentList @("-useenv")} if (![string]::IsNullOrEmpty($isAADMSIAuth) -and $isAADMSIAuth.ToLower() -eq 'true') { Write-Host "skipping agent onboarding via cert since AAD MSI Auth configured" } @@ -928,7 +922,7 @@ else { Write-Host "skipping agent onboarding via cert since AAD MSI Auth configured" #start Windows AMA - Start-Job -ScriptBlock { Start-Process -NoNewWindow -FilePath "C:\opt\windowsazuremonitoragent\windowsazuremonitoragent\Monitoring\Agent\MonAgentLauncher.exe" -ArgumentList @("-useenv") } + Start-Job -ScriptBlock { Start-Process -NoNewWindow -FilePath "C:\opt\windowsazuremonitoragent\windowsazuremonitoragent\Monitoring\Agent\MonAgentLauncher.exe" -ArgumentList @("-useenv")} $version = Get-Content -Path "C:\opt\windowsazuremonitoragent\version.txt" Write-Host $version } From 28f765442a985009d3a15362d955159d48d26c23 Mon Sep 17 00:00:00 2001 From: Amol Agrawal Date: Thu, 23 May 2024 00:30:05 +0000 Subject: [PATCH 12/15] update enablement conditions --- kubernetes/linux/main.sh | 13 ++++++++++--- source/plugins/ruby/in_kube_podinventory.rb | 1 + 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh index ca88a7cb2..5163d9132 100644 --- a/kubernetes/linux/main.sh +++ b/kubernetes/linux/main.sh @@ -1073,7 +1073,14 @@ if [ -e "/opt/dcr_env_var" ]; then setGlobalEnvVar LOGS_AND_EVENTS_ONLY "${LOGS_AND_EVENTS_ONLY}" fi -setGlobalEnvVar AZMON_RESOURCE_OPTIMIZATION_ENABLED "${AZMON_RESOURCE_OPTIMIZATION_ENABLED}" +setGlobalEnvVar ENABLE_CUSTOM_METRICS "${ENABLE_CUSTOM_METRICS}" +if [ "${ENABLE_CUSTOM_METRICS}" == "true" ]; then + setGlobalEnvVar AZMON_RESOURCE_OPTIMIZATION_ENABLED "false" + export AZMON_RESOURCE_OPTIMIZATION_ENABLED="false" +else + setGlobalEnvVar AZMON_RESOURCE_OPTIMIZATION_ENABLED "${AZMON_RESOURCE_OPTIMIZATION_ENABLED}" +fi + if [ "$AZMON_RESOURCE_OPTIMIZATION_ENABLED" != "true" ]; then # no dependency on fluentd for prometheus side car container if [ "${CONTAINER_TYPE}" != "PrometheusSidecar" ] && [ "${GENEVA_LOGS_INTEGRATION_SERVICE_MODE}" != "true" ]; then @@ -1226,7 +1233,7 @@ if [ "${GENEVA_LOGS_INTEGRATION_SERVICE_MODE}" != "true" ]; then sed -i -e "s/placeholder_hostname/$nodename/g" $telegrafConfFile fi -if [ "${AZMON_RESOURCE_OPTIMIZATION_ENABLED}" == "true" ] || [ "${ENABLE_CUSTOM_METRICS}" != "true" ]; then +if [ "${ENABLE_CUSTOM_METRICS}" != "true" ]; then sed -i '/^#CustomMetricsStart/,/^#CustomMetricsEnd/ s/^/# /' $telegrafConfFile fi @@ -1255,7 +1262,7 @@ if [ ! -e "/etc/config/kube.conf" ] && [ "${GENEVA_LOGS_INTEGRATION_SERVICE_MODE else echo "checking for listener on tcp #25226 and waiting for $WAITTIME_PORT_25226 secs if not.." waitforlisteneronTCPport 25226 $WAITTIME_PORT_25226 - if [ "${AZMON_RESOURCE_OPTIMIZATION_ENABLED}" != "true" ] || [ "${ENABLE_CUSTOM_METRICS}" == true ]; then + if [ "${ENABLE_CUSTOM_METRICS}" == true ]; then echo "checking for listener on tcp #25228 and waiting for $WAITTIME_PORT_25228 secs if not.." waitforlisteneronTCPport 25228 $WAITTIME_PORT_25228 fi diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb index 4f5d18364..aebe53bd2 100644 --- a/source/plugins/ruby/in_kube_podinventory.rb +++ b/source/plugins/ruby/in_kube_podinventory.rb @@ -2,6 +2,7 @@ # frozen_string_literal: true require "fluent/plugin/input" +require "oms_common" module Fluent::Plugin class Kube_PodInventory_Input < Input From 5b26090e380b9c1e758ce79c070e0608ff378747 Mon Sep 17 00:00:00 2001 From: Amol Agrawal Date: Thu, 23 May 2024 02:05:52 +0000 Subject: [PATCH 13/15] fix bug --- source/plugins/ruby/in_kube_podinventory.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb index aebe53bd2..9ba02327c 100644 --- a/source/plugins/ruby/in_kube_podinventory.rb +++ b/source/plugins/ruby/in_kube_podinventory.rb @@ -2,7 +2,7 @@ # frozen_string_literal: true require "fluent/plugin/input" -require "oms_common" +require_relative "oms_common" module Fluent::Plugin class Kube_PodInventory_Input < Input From adc85fd511029c180a0d51deffe5755d575e7d53 Mon Sep 17 00:00:00 2001 From: Amol Agrawal Date: Thu, 23 May 2024 16:48:14 +0000 Subject: [PATCH 14/15] add kube-cm as well --- kubernetes/linux/main.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh index 5163d9132..998df7273 100644 --- a/kubernetes/linux/main.sh +++ b/kubernetes/linux/main.sh @@ -1050,6 +1050,7 @@ if [ "${AZMON_WINDOWS_FLUENT_BIT_DISABLED}" == "true" ] || [ -z "${AZMON_WINDOWS if [ -e "/etc/config/kube.conf" ]; then # Replace a string in the configmap file sed -i "s/#@include windows_rs/@include windows_rs/g" /etc/fluent/kube.conf + sed -i "s/#@include windows_rs/@include windows_rs/g" /etc/fluent/kube-cm.conf fi fi From 35c6af64a8e34a174dfb38d08d41cfb5e4ee4591 Mon Sep 17 00:00:00 2001 From: Amol Agrawal Date: Thu, 23 May 2024 21:26:04 +0000 Subject: [PATCH 15/15] fix config issues --- .../installer/scripts/fluent-bit-conf-customizer.rb | 6 +++++- build/common/installer/scripts/tomlparser-agent-config.rb | 8 +++++++- source/plugins/ruby/CustomMetricsUtils.rb | 5 +++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/build/common/installer/scripts/fluent-bit-conf-customizer.rb b/build/common/installer/scripts/fluent-bit-conf-customizer.rb index 0a591efc8..6b937f2e7 100644 --- a/build/common/installer/scripts/fluent-bit-conf-customizer.rb +++ b/build/common/installer/scripts/fluent-bit-conf-customizer.rb @@ -131,6 +131,7 @@ def substituteFluentBitPlaceHolders multilineLogging = ENV["AZMON_MULTILINE_ENABLED"] stacktraceLanguages = ENV["AZMON_MULTILINE_LANGUAGES"] resourceOptimizationEnabled = ENV["AZMON_RESOURCE_OPTIMIZATION_ENABLED"] + enableCustomMetrics = ENV["ENABLE_CUSTOM_METRICS"] windowsFluentBitDisabled = ENV["AZMON_WINDOWS_FLUENT_BIT_DISABLED"] kubernetesMetadataCollection = ENV["AZMON_KUBERNETES_METADATA_ENABLED"] annotationBasedLogFiltering = ENV["AZMON_ANNOTATION_BASED_LOG_FILTERING"] @@ -194,7 +195,10 @@ def substituteFluentBitPlaceHolders new_contents = substituteMultiline(multilineLogging, stacktraceLanguages, new_contents) - if !@isWindows || (@isWindows && (!windowsFluentBitDisabled.nil? && windowsFluentBitDisabled.to_s.downcase == "false")) + # Valid resource optimization scenarios + # if Linux and Custom Metrics not enabled + # or if Windows and Fluent Bit is not disabled + if (!@isWindows && (enableCustomMetrics.nil? || enableCustomMetrics.to_s.downcase == "false")) || (@isWindows && (!windowsFluentBitDisabled.nil? && windowsFluentBitDisabled.to_s.downcase == "false")) new_contents = substituteResourceOptimization(resourceOptimizationEnabled, new_contents) end File.open(@fluent_bit_conf_path, "w") { |file| file.puts new_contents } diff --git a/build/common/installer/scripts/tomlparser-agent-config.rb b/build/common/installer/scripts/tomlparser-agent-config.rb index 3d74dc3c4..a0036634b 100644 --- a/build/common/installer/scripts/tomlparser-agent-config.rb +++ b/build/common/installer/scripts/tomlparser-agent-config.rb @@ -384,10 +384,16 @@ def populateSettingValuesFromConfigMap(parsedConfig) end end + enable_custom_metrics = ENV["ENABLE_CUSTOM_METRICS"] + if !enable_custom_metrics.nil? && enable_custom_metrics.to_s.downcase == "true" + @resource_optimization_enabled = false + puts "Resource Optimization disabled since custom metrics is enabled" + end + windows_fluent_bit_config = parsedConfig[:agent_settings][:windows_fluent_bit] if !windows_fluent_bit_config.nil? windows_fluent_bit_disabled = windows_fluent_bit_config[:disabled] - if !windows_fluent_bit_disabled.nil? && windows_fluent_bit_disabled.downcase == "false" + if !windows_fluent_bit_disabled.nil? && windows_fluent_bit_disabled.to_s.downcase == "false" @windows_fluent_bit_disabled = false end puts "Using config map value: AZMON_WINDOWS_FLUENT_BIT_DISABLED = #{@windows_fluent_bit_disabled}" diff --git a/source/plugins/ruby/CustomMetricsUtils.rb b/source/plugins/ruby/CustomMetricsUtils.rb index 77675950c..eb5470056 100644 --- a/source/plugins/ruby/CustomMetricsUtils.rb +++ b/source/plugins/ruby/CustomMetricsUtils.rb @@ -10,6 +10,7 @@ def check_custom_metrics_availability aks_region = ENV['AKS_REGION'] aks_resource_id = ENV['AKS_RESOURCE_ID'] aks_cloud_environment = ENV['CLOUD_ENVIRONMENT'] + enable_custom_metrics = ENV['ENABLE_CUSTOM_METRICS'] if aks_region.to_s.empty? || aks_resource_id.to_s.empty? return false # This will also take care of AKS-Engine Scenario. AKS_REGION/AKS_RESOURCE_ID is not set for AKS-Engine. Only ACS_RESOURCE_NAME is set end @@ -19,6 +20,10 @@ def check_custom_metrics_availability return true end + if enable_custom_metrics.nil? || enable_custom_metrics.to_s.downcase == 'false' + return false + end + return aks_cloud_environment.to_s.downcase == 'azurepubliccloud' end end