diff --git a/.github/workflows/ci_build_test.yaml b/.github/workflows/ci_build_test.yaml index 44c1b3e..660c7c2 100644 --- a/.github/workflows/ci_build_test.yaml +++ b/.github/workflows/ci_build_test.yaml @@ -66,8 +66,9 @@ jobs: CI_INDEX_EVENTS: ci_events CI_INDEX_OBJECTS: ci_objects CI_INDEX_METRICS: ci_metrics - KUBERNETES_VERSION: v1.15.2 - MINIKUBE_VERSION: v1.21.0 + KUBERNETES_VERSION: v1.23.2 + MINIKUBE_VERSION: v1.24.0 + MINIKUBE_NODE_COUNTS: 2 GITHUB_ACTIONS: true steps: @@ -121,7 +122,7 @@ jobs: chmod +x minikube sudo mv minikube /usr/local/bin/ # Start Minikube and Wait - minikube start --driver=docker --container-runtime=docker --cpus 2 --memory 4096 --kubernetes-version=${KUBERNETES_VERSION} --no-vtx-check + minikube start --driver=docker --container-runtime=docker --cpus 2 --memory 4096 --kubernetes-version=${KUBERNETES_VERSION} --no-vtx-check -n=${MINIKUBE_NODE_COUNTS} export JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}' until kubectl get nodes -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do sleep 1; @@ -130,13 +131,15 @@ jobs: - name: Install Splunk run: | # Wait until minikube is ready - kubectl apply -f https://docs.projectcalico.org/v3.14/manifests/calico.yaml export JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}' until kubectl get nodes -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do echo "wait for minikube ready ..." sleep 1; done kubectl get nodes + until kubectl get sa | grep -q 'default'; do + sleep 1; + done # Install Splunk on minikube kubectl apply -f ci_scripts/k8s-splunk.yml # Wait until splunk is ready @@ -167,7 +170,7 @@ jobs: run: | cd /opt/splunk-connect-for-kubernetes kubectl apply -f test/test_setup.yaml - sleep 60 + sleep 120 - uses: actions/setup-python@v2 with: @@ -189,4 +192,5 @@ jobs: --splunkd-url https://$CI_SPLUNK_HOST:8089 \ --splunk-user admin \ --splunk-password $CI_SPLUNK_PASSWORD \ - -p no:warnings -s + --nodes-count $MINIKUBE_NODE_COUNTS\ + -p no:warnings -s -n auto diff --git a/ci_scripts/deploy_connector.sh b/ci_scripts/deploy_connector.sh index b2d5a55..6220f0e 100755 --- a/ci_scripts/deploy_connector.sh +++ b/ci_scripts/deploy_connector.sh @@ -23,7 +23,11 @@ helm install ci-sck --set global.splunk.hec.token=$CI_SPLUNK_HEC_TOKEN \ --set splunk-kubernetes-metrics.image.tag=recent \ --set splunk-kubernetes-metrics.image.pullPolicy=IfNotPresent \ -f ci_scripts/sck_values.yml helm-chart/splunk-connect-for-kubernetes -#wait for deployment to finish -until kubectl get pod | grep Running | [[ $(wc -l) == 4 ]]; do + +kubectl get pod +# wait for deployment to finish +# metric and logging deamon set for each node + aggr + object + splunk +PODS=$((MINIKUBE_NODE_COUNTS*2+2+1)) +until kubectl get pod | grep Running | [[ $(wc -l) == $PODS ]]; do sleep 1; done diff --git a/lib/fluent/plugin/in_kubernetes_metrics.rb b/lib/fluent/plugin/in_kubernetes_metrics.rb index 261192c..d50073e 100644 --- a/lib/fluent/plugin/in_kubernetes_metrics.rb +++ b/lib/fluent/plugin/in_kubernetes_metrics.rb @@ -576,8 +576,42 @@ def emit_pod_metrics(node_name, pod) unless pod['startTime'].nil? emit_uptime tag: tag, start_time: pod['startTime'], labels: labels - emit_cpu_metrics tag: tag, metrics: pod['cpu'], labels: labels if pod['cpu'] unless pod['cpu'].nil? - emit_memory_metrics tag: tag, metrics: pod['memory'], labels: labels if pod['memory'] unless pod['memory'].nil? + if pod['cpu'].nil? + if pod['containers'].nil? or Array(pod['containers']).empty? + log.warn "Summary API response has no pod cpu metrics information" + else + usageNanoCores = 0 + usageCoreNanoSeconds = 0 + time = nil + Array(pod['containers']).each do |container| + time = container['time'] unless container['time'].nil? + usageNanoCores += container['usageNanoCores'] + usageCoreNanoSeconds += container['usageCoreNanoSeconds'] + end + pod['cpu'] = { 'time' => time, 'usageNanoCores' => usageNanoCores, 'usageCoreNanoSeconds' => usageCoreNanoSeconds } + end + end + emit_cpu_metrics tag: tag, metrics: pod['cpu'], labels: labels unless pod['cpu'].nil? + if pod['memory'].nil? + if pod['containers'].nil? or Array(pod['containers']).empty? + log.warn "Summary API response has no pod memory metrics information" + else + Array(pod['containers']).each do |container| + time = nil + memory_metrics = {} + %w[availableBytes usageBytes workingSetBytes rssBytes pageFaults majorPageFaults].each do |name| + time = container['time'] unless container['time'].nil? + if value = metrics[name] + memory_metrics[name] = 0 if memory_metrics[name].nil? + memory_metrics[name] += value + end + end + end + memory_metrics['time'] = time + pod['memory'] = memory_metrics + end + end + emit_memory_metrics tag: tag, metrics: pod['memory'], labels: labels unless pod['memory'].nil? emit_network_metrics tag: tag, metrics: pod['network'], labels: labels unless pod['network'].nil? emit_fs_metrics tag: "#{tag}.ephemeral-storage", metrics: pod['ephemeral-storage'], labels: labels unless pod['ephemeral-storage'].nil? unless pod['volume'].nil? @@ -595,6 +629,7 @@ def emit_pod_metrics(node_name, pod) def emit_metrics(metrics) emit_node_metrics(metrics['node']) unless metrics['node'].nil? + log.warn "Summary API received empty pods info" if (metrics['pods'].nil? or metrics['pods'].empty?) Array(metrics['pods']).each &method(:emit_pod_metrics).curry.call(metrics['node']['nodeName']) unless metrics['pods'].nil? end