From eb641b02bcb1c2a5a5f1b3e8ff26f93bbc46c617 Mon Sep 17 00:00:00 2001 From: Alex Robinson Date: Tue, 26 Jan 2016 23:57:02 +0000 Subject: [PATCH 1/3] Increase the fluentd buffer chunk size to improve write throughput. Also reduce the max wait between retries, 30 seconds should be more than enough backoff. --- .../fluentd-es-image/td-agent.conf | 8 ++++---- .../fluentd-gcp-image/google-fluentd.conf | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf index 4a155ac42968..ddfe37c979fc 100644 --- a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf +++ b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf @@ -186,12 +186,12 @@ port 9200 logstash_format true # Set the chunk limit the same as for fluentd-gcp. - buffer_chunk_limit 512K - # Cap buffer memory usage to 512KB/chunk * 128 chunks = 65 MB - buffer_queue_limit 128 + buffer_chunk_limit 2M + # Cap buffer memory usage to 2MiB/chunk * 32 chunks = 64 MiB + buffer_queue_limit 32 flush_interval 5s # Never wait longer than 5 minutes between retries. - max_retry_wait 300 + max_retry_wait 30 # Disable the limit on the number of retries (retry forever). disable_retry_limit diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf index 25a49850f012..9b0b7ff49334 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf @@ -130,15 +130,15 @@ type google_cloud # Set the chunk limit conservatively to avoid exceeding the GCL limit - # of 2MB per write request. - buffer_chunk_limit 512K + # of 10MiB per write request. + buffer_chunk_limit 2M # Cap the combined memory usage of this buffer and the one below to - # 512KB/chunk * (96 + 32) chunks = 65 MB - buffer_queue_limit 96 + # 2MiB/chunk * (24 + 8) chunks = 64 MiB + buffer_queue_limit 24 # Never wait more than 5 seconds before flushing logs in the non-error case. flush_interval 5s - # Never wait longer than 5 minutes between retries. - max_retry_wait 300 + # Never wait longer than 30 seconds between retries. + max_retry_wait 30 # Disable the limit on the number of retries (retry forever). disable_retry_limit @@ -148,9 +148,9 @@ type google_cloud detect_subservice false - buffer_chunk_limit 512K - buffer_queue_limit 32 + buffer_chunk_limit 2M + buffer_queue_limit 8 flush_interval 5s - max_retry_wait 300 + max_retry_wait 30 disable_retry_limit From e60ff3e2a3973f9132958a18eae55e3ccf0eab70 Mon Sep 17 00:00:00 2001 From: Alex Robinson Date: Tue, 26 Jan 2016 23:59:27 +0000 Subject: [PATCH 2/3] Don't let fluentd pipe its own logs directly back into itself. --- .../fluentd-elasticsearch/fluentd-es-image/td-agent.conf | 5 +++++ .../addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf index ddfe37c979fc..bbe398541816 100644 --- a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf +++ b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf @@ -100,6 +100,11 @@ # problem yet to be solved as secrets are not usable in static pods which the fluentd # pod must be until a per-node controller is available in Kubernetes. +# Do not directly collect fluentd's own logs to avoid infinite loops. + + type null + + type tail path /var/log/containers/*.log diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf index 9b0b7ff49334..98caf02fb60c 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf @@ -42,6 +42,11 @@ # the name of the Kubernetes container regardless of how many times the # Kubernetes pod has been restarted (resulting in a several Docker container IDs). +# Do not directly collect fluentd's own logs to avoid infinite loops. + + type null + + type tail format json From 52834729c0a87779d6d1468a6e9bbbd6842233f3 Mon Sep 17 00:00:00 2001 From: Alex Robinson Date: Wed, 27 Jan 2016 00:07:46 +0000 Subject: [PATCH 3/3] Update the fluentd versions to include fixes for #19405. --- cluster/addons/fluentd-elasticsearch/fluentd-es-image/Makefile | 2 +- cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile | 2 +- cluster/saltbase/salt/fluentd-es/fluentd-es.yaml | 2 +- cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml | 2 +- docs/getting-started-guides/logging.md | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/Makefile b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/Makefile index bf80e2aa8e8d..c045cb5b8246 100644 --- a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/Makefile +++ b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/Makefile @@ -1,7 +1,7 @@ .PHONY: build push IMAGE = fluentd-elasticsearch -TAG = 1.11 +TAG = 1.13 build: docker build -t gcr.io/google_containers/$(IMAGE):$(TAG) . diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile index ff9a86c4c962..e7a506f3aa62 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile @@ -14,7 +14,7 @@ .PHONY: kbuild kpush -TAG = 1.14 +TAG = 1.15 # Rules for building the test image for deployment to Dockerhub with user kubernetes. diff --git a/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml b/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml index c2105b29476a..939471d24c0a 100644 --- a/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml +++ b/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml @@ -6,7 +6,7 @@ metadata: spec: containers: - name: fluentd-elasticsearch - image: gcr.io/google_containers/fluentd-elasticsearch:1.11 + image: gcr.io/google_containers/fluentd-elasticsearch:1.13 resources: limits: cpu: 100m diff --git a/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml b/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml index dcc3322f24fe..c1da5ea4aa13 100644 --- a/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml +++ b/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml @@ -6,7 +6,7 @@ metadata: spec: containers: - name: fluentd-cloud-logging - image: gcr.io/google_containers/fluentd-gcp:1.14 + image: gcr.io/google_containers/fluentd-gcp:1.15 resources: limits: cpu: 100m diff --git a/docs/getting-started-guides/logging.md b/docs/getting-started-guides/logging.md index c6b2b69f7134..258cef104a23 100644 --- a/docs/getting-started-guides/logging.md +++ b/docs/getting-started-guides/logging.md @@ -141,7 +141,7 @@ metadata: spec: containers: - name: fluentd-cloud-logging - image: gcr.io/google_containers/fluentd-gcp:1.14 + image: gcr.io/google_containers/fluentd-gcp:1.15 resources: limits: cpu: 100m