diff --git a/README.md b/README.md index 0453f83..bd2d5c3 100644 --- a/README.md +++ b/README.md @@ -70,3 +70,5 @@ If the filename ends with `.url` suffix, the content will be processed as an URL | `DEFAULT_FILE_MODE` | The default file system permission for every file. Use three digits (e.g. '500', '440', ...) | false | - | string | `KUBECONFIG` | if this is given and points to a file or `~/.kube/config` is mounted k8s config will be loaded from this file, otherwise "incluster" k8s configuration is tried. | false | - | string |`ENABLE_5XX` | Set to `true` to enable pulling of 5XX response content from config map. Used in case if the filename ends with `.url` suffix (Please refer to the `*.url` feature here.) | false | - | boolean +| `WATCH_SERVER_TIMEOUT` | polite request to the server, asking it to cleanly close watch connections after this amount of seconds ([#85](https://github.com/kiwigrid/k8s-sidecar/issues/85))| false | `60` | integer +| `WATCH_CLIENT_TIMEOUT` | If you have a network outage dropping all packets with no RST/FIN, this is how many seconds your client waits on watches before realizing & dropping the connection. You can keep this number low. ([#85](https://github.com/kiwigrid/k8s-sidecar/issues/85)) | false | `66` | integer diff --git a/sidecar/helpers.py b/sidecar/helpers.py index 66a2ece..fd5ad98 100755 --- a/sidecar/helpers.py +++ b/sidecar/helpers.py @@ -19,6 +19,22 @@ REQ_RETRY_BACKOFF_FACTOR = 1.1 if os.getenv("REQ_RETRY_BACKOFF_FACTOR") is None else float(os.getenv("REQ_RETRY_BACKOFF_FACTOR")) REQ_TIMEOUT = 10 if os.getenv("REQ_TIMEOUT") is None else float(os.getenv("REQ_TIMEOUT")) +# Tune default timeouts as outlined in +# https://github.com/kubernetes-client/python/issues/1148#issuecomment-626184613 +# https://github.com/kubernetes-client/python/blob/master/examples/watch/timeout-settings.md +# I picked 60 and 66 due to https://github.com/nolar/kopf/issues/847#issuecomment-971651446 + +# 60 is a polite request to the server, asking it to cleanly close the connection after that. +# If you have a network outage, this does nothing. +# You can set this number much higher, maybe to 3600 seconds (1h). +WATCH_SERVER_TIMEOUT = os.environ.get("WATCH_SERVER_TIMEOUT", 60) + +# 66 is a client-side timeout, configuring your local socket. +# If you have a network outage dropping all packets with no RST/FIN, +# this is how long your client waits before realizing & dropping the connection. +# You can keep this number low, maybe 60 seconds. +WATCH_CLIENT_TIMEOUT = os.environ.get("WATCH_CLIENT_TIMEOUT", 66) + def write_data_to_file(folder, filename, data, data_type=CONTENT_TYPE_TEXT): """ Write text to a file. If the parent folder doesn't exist, create it. If there are insufficient diff --git a/sidecar/resources.py b/sidecar/resources.py index 8a9f06f..62034c5 100755 --- a/sidecar/resources.py +++ b/sidecar/resources.py @@ -15,7 +15,7 @@ from urllib3.exceptions import ProtocolError from helpers import request, write_data_to_file, remove_file, timestamp, unique_filename, CONTENT_TYPE_TEXT, \ - CONTENT_TYPE_BASE64_BINARY, execute + CONTENT_TYPE_BASE64_BINARY, execute, WATCH_SERVER_TIMEOUT, WATCH_CLIENT_TIMEOUT RESOURCE_SECRET = "secret" RESOURCE_CONFIGMAP = "configmap" @@ -191,7 +191,9 @@ def _watch_resource_iterator(label, label_value, target_folder, request_url, req label_selector = f"{label}={label_value}" if label_value else label additional_args = { - 'label_selector': label_selector + 'label_selector': label_selector, + 'timeout_seconds': WATCH_SERVER_TIMEOUT, + '_request_timeout': WATCH_CLIENT_TIMEOUT, } if namespace != "ALL": additional_args['namespace'] = namespace