From fc2ab3f616babbd8e3b93ae15b84737a462b33b8 Mon Sep 17 00:00:00 2001 From: harshit-splunk Date: Wed, 9 Feb 2022 14:38:50 +0530 Subject: [PATCH 1/4] updated emit_pod_metrics --- lib/fluent/plugin/in_kubernetes_metrics.rb | 40 ++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/lib/fluent/plugin/in_kubernetes_metrics.rb b/lib/fluent/plugin/in_kubernetes_metrics.rb index 261192c..7005a34 100644 --- a/lib/fluent/plugin/in_kubernetes_metrics.rb +++ b/lib/fluent/plugin/in_kubernetes_metrics.rb @@ -576,8 +576,43 @@ def emit_pod_metrics(node_name, pod) unless pod['startTime'].nil? emit_uptime tag: tag, start_time: pod['startTime'], labels: labels - emit_cpu_metrics tag: tag, metrics: pod['cpu'], labels: labels if pod['cpu'] unless pod['cpu'].nil? - emit_memory_metrics tag: tag, metrics: pod['memory'], labels: labels if pod['memory'] unless pod['memory'].nil? + if pod['cpu'].nil? + if pod['containers'].nil? or Array(pod['containers']).empty? + log.warn "Summary API response has no pod cpu metrics information" + else + usageNanoCores = 0 + usageCoreNanoSeconds = 0 + time = nil + Array(pod['containers']).each do |container| + time = container['time'] unless container['time'].nil? + usageNanoCores += container['usageNanoCores'] + usageCoreNanoSeconds += container['usageCoreNanoSeconds'] + end + pod['cpu'] = { 'time' => time, 'usageNanoCores' => usageNanoCores, 'usageCoreNanoSeconds' => usageCoreNanoSeconds } + end + end + emit_cpu_metrics tag: tag, metrics: pod['cpu'], labels: labels unless pod['cpu'].nil? + end + if pod['memory'].nil? + if pod['containers'].nil? or Array(pod['containers']).empty? + log.warn "Summary API response has no pod memory metrics information" + else + Array(pod['containers']).each do |container| + time = nil + memory_metrics = {} + %w[availableBytes usageBytes workingSetBytes rssBytes pageFaults majorPageFaults].each do |name| + time = container['time'] unless container['time'].nil? + if value = metrics[name] + memory_metrics[name] = 0 if memory_metrics[name].nil? + memory_metrics[name] += value + end + end + end + memory_metrics['time'] = time + pod['memory'] = memory_metrics + end + end + emit_memory_metrics tag: tag, metrics: pod['memory'], labels: labels unless pod['memory'].nil? emit_network_metrics tag: tag, metrics: pod['network'], labels: labels unless pod['network'].nil? emit_fs_metrics tag: "#{tag}.ephemeral-storage", metrics: pod['ephemeral-storage'], labels: labels unless pod['ephemeral-storage'].nil? unless pod['volume'].nil? @@ -595,6 +630,7 @@ def emit_pod_metrics(node_name, pod) def emit_metrics(metrics) emit_node_metrics(metrics['node']) unless metrics['node'].nil? + log.warn "Summary API received empty pods info" if (metrics['pods'].nil? or metrics['pods'].empty?) Array(metrics['pods']).each &method(:emit_pod_metrics).curry.call(metrics['node']['nodeName']) unless metrics['pods'].nil? end From 46383abd74133ccd6ffce49eecffd9f069467d9c Mon Sep 17 00:00:00 2001 From: harshit-splunk Date: Wed, 9 Feb 2022 14:56:14 +0530 Subject: [PATCH 2/4] fixed syntax error --- lib/fluent/plugin/in_kubernetes_metrics.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/fluent/plugin/in_kubernetes_metrics.rb b/lib/fluent/plugin/in_kubernetes_metrics.rb index 7005a34..d50073e 100644 --- a/lib/fluent/plugin/in_kubernetes_metrics.rb +++ b/lib/fluent/plugin/in_kubernetes_metrics.rb @@ -592,7 +592,6 @@ def emit_pod_metrics(node_name, pod) end end emit_cpu_metrics tag: tag, metrics: pod['cpu'], labels: labels unless pod['cpu'].nil? - end if pod['memory'].nil? if pod['containers'].nil? or Array(pod['containers']).empty? log.warn "Summary API response has no pod memory metrics information" From 6da6061730dbf12964561f04c945d9147ceb4068 Mon Sep 17 00:00:00 2001 From: harshit-splunk Date: Wed, 9 Feb 2022 17:40:36 +0530 Subject: [PATCH 3/4] incresed sleep time in CI workflow --- .github/workflows/ci_build_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_build_test.yaml b/.github/workflows/ci_build_test.yaml index 44c1b3e..702bdb8 100644 --- a/.github/workflows/ci_build_test.yaml +++ b/.github/workflows/ci_build_test.yaml @@ -167,7 +167,7 @@ jobs: run: | cd /opt/splunk-connect-for-kubernetes kubectl apply -f test/test_setup.yaml - sleep 60 + sleep 120 - uses: actions/setup-python@v2 with: From 59f3c70ff8383a29393a2e19cd7f4d822ac23d76 Mon Sep 17 00:00:00 2001 From: harshit-splunk Date: Thu, 10 Feb 2022 13:02:20 +0530 Subject: [PATCH 4/4] updated CI workflow --- .github/workflows/ci_build_test.yaml | 14 +++++++++----- ci_scripts/deploy_connector.sh | 8 ++++++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci_build_test.yaml b/.github/workflows/ci_build_test.yaml index 702bdb8..660c7c2 100644 --- a/.github/workflows/ci_build_test.yaml +++ b/.github/workflows/ci_build_test.yaml @@ -66,8 +66,9 @@ jobs: CI_INDEX_EVENTS: ci_events CI_INDEX_OBJECTS: ci_objects CI_INDEX_METRICS: ci_metrics - KUBERNETES_VERSION: v1.15.2 - MINIKUBE_VERSION: v1.21.0 + KUBERNETES_VERSION: v1.23.2 + MINIKUBE_VERSION: v1.24.0 + MINIKUBE_NODE_COUNTS: 2 GITHUB_ACTIONS: true steps: @@ -121,7 +122,7 @@ jobs: chmod +x minikube sudo mv minikube /usr/local/bin/ # Start Minikube and Wait - minikube start --driver=docker --container-runtime=docker --cpus 2 --memory 4096 --kubernetes-version=${KUBERNETES_VERSION} --no-vtx-check + minikube start --driver=docker --container-runtime=docker --cpus 2 --memory 4096 --kubernetes-version=${KUBERNETES_VERSION} --no-vtx-check -n=${MINIKUBE_NODE_COUNTS} export JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}' until kubectl get nodes -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do sleep 1; @@ -130,13 +131,15 @@ jobs: - name: Install Splunk run: | # Wait until minikube is ready - kubectl apply -f https://docs.projectcalico.org/v3.14/manifests/calico.yaml export JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}' until kubectl get nodes -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do echo "wait for minikube ready ..." sleep 1; done kubectl get nodes + until kubectl get sa | grep -q 'default'; do + sleep 1; + done # Install Splunk on minikube kubectl apply -f ci_scripts/k8s-splunk.yml # Wait until splunk is ready @@ -189,4 +192,5 @@ jobs: --splunkd-url https://$CI_SPLUNK_HOST:8089 \ --splunk-user admin \ --splunk-password $CI_SPLUNK_PASSWORD \ - -p no:warnings -s + --nodes-count $MINIKUBE_NODE_COUNTS\ + -p no:warnings -s -n auto diff --git a/ci_scripts/deploy_connector.sh b/ci_scripts/deploy_connector.sh index b2d5a55..6220f0e 100755 --- a/ci_scripts/deploy_connector.sh +++ b/ci_scripts/deploy_connector.sh @@ -23,7 +23,11 @@ helm install ci-sck --set global.splunk.hec.token=$CI_SPLUNK_HEC_TOKEN \ --set splunk-kubernetes-metrics.image.tag=recent \ --set splunk-kubernetes-metrics.image.pullPolicy=IfNotPresent \ -f ci_scripts/sck_values.yml helm-chart/splunk-connect-for-kubernetes -#wait for deployment to finish -until kubectl get pod | grep Running | [[ $(wc -l) == 4 ]]; do + +kubectl get pod +# wait for deployment to finish +# metric and logging deamon set for each node + aggr + object + splunk +PODS=$((MINIKUBE_NODE_COUNTS*2+2+1)) +until kubectl get pod | grep Running | [[ $(wc -l) == $PODS ]]; do sleep 1; done