From 5f4ca6581b3f57a9115c035aa313a0980600fa4f Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Mon, 21 Oct 2019 23:41:19 +0300 Subject: [PATCH] http: reuse requests.Session This way we are able to properly utilize automatic connection pools and not create new fds for each request, which overflows ulimit for max fds very quickly on mac and windows. Kudos @pared for investigating :tada: Fixes #2600 Signed-off-by: Ruslan Kuprieiev --- dvc/remote/http.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/dvc/remote/http.py b/dvc/remote/http.py index 07636f017b..aef214c081 100644 --- a/dvc/remote/http.py +++ b/dvc/remote/http.py @@ -1,9 +1,11 @@ from __future__ import unicode_literals import logging + +from funcy import cached_property + from dvc.scheme import Schemes from dvc.utils.compat import open - from dvc.progress import Tqdm from dvc.exceptions import DvcException from dvc.config import Config, ConfigError @@ -14,6 +16,8 @@ class RemoteHTTP(RemoteBASE): scheme = Schemes.HTTP + SESSION_RETRIES = 5 + SESSION_BACKOFF_FACTOR = 0.1 REQUEST_TIMEOUT = 10 CHUNK_SIZE = 2 ** 16 PARAM_CHECKSUM = "etag" @@ -76,6 +80,24 @@ def get_file_checksum(self, path_info): return etag + @cached_property + def _session(self): + import requests + from requests.adapters import HTTPAdapter + from urllib3.util.retry import Retry + + session = requests.Session() + + retries = Retry( + total=self.SESSION_RETRIES, + backoff_factor=self.SESSION_BACKOFF_FACTOR, + ) + + session.mount("http://", HTTPAdapter(max_retries=retries)) + session.mount("https://", HTTPAdapter(max_retries=retries)) + + return session + def _request(self, method, url, **kwargs): import requests @@ -83,7 +105,7 @@ def _request(self, method, url, **kwargs): kwargs.setdefault("timeout", self.REQUEST_TIMEOUT) try: - return requests.request(method, url, **kwargs) + return self._session.request(method, url, **kwargs) except requests.exceptions.RequestException: raise DvcException("could not perform a {} request".format(method))