From ef1f038c322cc9a11cea1a631979e14faa65b57b Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 20 Nov 2019 13:29:43 -0600 Subject: [PATCH 01/46] analytics: refactor into a module --- dvc/analytics.py | 256 ----------------------------------- dvc/analytics/__init__.py | 64 +++++++++ dvc/analytics/__main__.py | 8 ++ dvc/analytics/system_info.py | 29 ++++ dvc/analytics/user_id.py | 55 ++++++++ dvc/command/daemon.py | 20 --- dvc/config.py | 11 +- dvc/lock.py | 6 +- dvc/main.py | 5 +- dvc/repo/init.py | 4 +- tests/func/test_analytics.py | 65 --------- tests/unit/test_analytics.py | 4 +- 12 files changed, 171 insertions(+), 356 deletions(-) delete mode 100644 dvc/analytics.py create mode 100644 dvc/analytics/__init__.py create mode 100644 dvc/analytics/__main__.py create mode 100644 dvc/analytics/system_info.py create mode 100644 dvc/analytics/user_id.py delete mode 100644 tests/func/test_analytics.py diff --git a/dvc/analytics.py b/dvc/analytics.py deleted file mode 100644 index 8b39fc31b8..0000000000 --- a/dvc/analytics.py +++ /dev/null @@ -1,256 +0,0 @@ -"""Collect and send usage analytics""" -from __future__ import unicode_literals - -import errno -import json -import logging -import os - -from dvc import __version__ -from dvc.utils import env2bool -from dvc.utils.compat import str - - -logger = logging.getLogger(__name__) - - -class Analytics(object): - """Class for collecting and sending usage analytics. - - Args: - info (dict): optional existing analytics report. - """ - - URL = "https://analytics.dvc.org" - TIMEOUT_POST = 5 - - USER_ID_FILE = "user_id" - - PARAM_DVC_VERSION = "dvc_version" - PARAM_USER_ID = "user_id" - PARAM_SYSTEM_INFO = "system_info" - - PARAM_OS = "os" - - PARAM_WINDOWS_VERSION_MAJOR = "windows_version_major" - PARAM_WINDOWS_VERSION_MINOR = "windows_version_minor" - PARAM_WINDOWS_VERSION_BUILD = "windows_version_build" - PARAM_WINDOWS_VERSION_SERVICE_PACK = "windows_version_service_pack" - - PARAM_MAC_VERSION = "mac_version" - - PARAM_LINUX_DISTRO = "linux_distro" - PARAM_LINUX_DISTRO_VERSION = "linux_distro_version" - PARAM_LINUX_DISTRO_LIKE = "linux_distro_like" - - PARAM_SCM_CLASS = "scm_class" - PARAM_IS_BINARY = "is_binary" - PARAM_CMD_CLASS = "cmd_class" - PARAM_CMD_RETURN_CODE = "cmd_return_code" - - def __init__(self, info=None): - from dvc.config import Config - from dvc.lock import Lock - - if info is None: - info = {} - - self.info = info - - cdir = Config.get_global_config_dir() - try: - os.makedirs(cdir) - except OSError as exc: - if exc.errno != errno.EEXIST: - raise - - self.user_id_file = os.path.join(cdir, self.USER_ID_FILE) - self.user_id_file_lock = Lock(self.user_id_file + ".lock") - - @staticmethod - def load(path): - """Loads analytics report from json file specified by path. - - Args: - path (str): path to json file with analytics report. - """ - with open(path, "r") as fobj: - analytics = Analytics(info=json.load(fobj)) - os.unlink(path) - return analytics - - def _write_user_id(self): - import uuid - - with open(self.user_id_file, "w+") as fobj: - user_id = str(uuid.uuid4()) - info = {self.PARAM_USER_ID: user_id} - json.dump(info, fobj) - return user_id - - def _read_user_id(self): - if not os.path.exists(self.user_id_file): - return None - - with open(self.user_id_file, "r") as fobj: - try: - info = json.load(fobj) - except ValueError as exc: - logger.debug("Failed to load user_id: {}".format(exc)) - return None - - return info[self.PARAM_USER_ID] - - def _get_user_id(self): - from dvc.lock import LockError - - try: - with self.user_id_file_lock: - user_id = self._read_user_id() - if user_id is None: - user_id = self._write_user_id() - return user_id - except LockError: - msg = "Failed to acquire '{}'" - logger.debug(msg.format(self.user_id_file_lock.lockfile)) - - def _collect_windows(self): - import sys - - version = sys.getwindowsversion() # pylint: disable=no-member - info = {} - info[self.PARAM_OS] = "windows" - info[self.PARAM_WINDOWS_VERSION_MAJOR] = version.major - info[self.PARAM_WINDOWS_VERSION_MINOR] = version.minor - info[self.PARAM_WINDOWS_VERSION_BUILD] = version.build - info[self.PARAM_WINDOWS_VERSION_SERVICE_PACK] = version.service_pack - return info - - def _collect_darwin(self): - import platform - - info = {} - info[self.PARAM_OS] = "mac" - info[self.PARAM_MAC_VERSION] = platform.mac_ver()[0] - return info - - def _collect_linux(self): - import distro - - info = {} - info[self.PARAM_OS] = "linux" - info[self.PARAM_LINUX_DISTRO] = distro.id() - info[self.PARAM_LINUX_DISTRO_VERSION] = distro.version() - info[self.PARAM_LINUX_DISTRO_LIKE] = distro.like() - return info - - def _collect_system_info(self): - import platform - - system = platform.system() - - if system == "Windows": - return self._collect_windows() - - if system == "Darwin": - return self._collect_darwin() - - if system == "Linux": - return self._collect_linux() - - raise NotImplementedError - - def collect(self): - """Collect analytics report.""" - from dvc.scm import SCM - from dvc.utils import is_binary - from dvc.repo import Repo - from dvc.exceptions import NotDvcRepoError - - self.info[self.PARAM_DVC_VERSION] = __version__ - self.info[self.PARAM_IS_BINARY] = is_binary() - self.info[self.PARAM_USER_ID] = self._get_user_id() - - self.info[self.PARAM_SYSTEM_INFO] = self._collect_system_info() - - try: - scm = SCM(root_dir=Repo.find_root()) - self.info[self.PARAM_SCM_CLASS] = type(scm).__name__ - except NotDvcRepoError: - pass - - def collect_cmd(self, args, ret): - """Collect analytics info from a CLI command.""" - from dvc.command.daemon import CmdDaemonAnalytics - - assert isinstance(ret, int) or ret is None - - if ret is not None: - self.info[self.PARAM_CMD_RETURN_CODE] = ret - - if args is not None and hasattr(args, "func"): - assert args.func != CmdDaemonAnalytics - self.info[self.PARAM_CMD_CLASS] = args.func.__name__ - - def dump(self): - """Save analytics report to a temporary file. - - Returns: - str: path to the temporary file that contains the analytics report. - """ - import tempfile - - with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj: - json.dump(self.info, fobj) - return fobj.name - - @staticmethod - def is_enabled(cmd=None): - from dvc.config import Config, to_bool - from dvc.command.daemon import CmdDaemonBase - - if env2bool("DVC_TEST"): - return False - - if isinstance(cmd, CmdDaemonBase): - return False - - core = Config(validate=False).config.get(Config.SECTION_CORE, {}) - enabled = to_bool(core.get(Config.SECTION_CORE_ANALYTICS, "true")) - logger.debug( - "Analytics is {}.".format("enabled" if enabled else "disabled") - ) - return enabled - - @staticmethod - def send_cmd(cmd, args, ret): - """Collect and send analytics for CLI command. - - Args: - args (list): parsed args for the CLI command. - ret (int): return value of the CLI command. - """ - from dvc.daemon import daemon - - if not Analytics.is_enabled(cmd): - return - - analytics = Analytics() - analytics.collect_cmd(args, ret) - daemon(["analytics", analytics.dump()]) - - def send(self): - """Collect and send analytics.""" - import requests - - if not self.is_enabled(): - return - - self.collect() - - logger.debug("Sending analytics: {}".format(self.info)) - - try: - requests.post(self.URL, json=self.info, timeout=self.TIMEOUT_POST) - except requests.exceptions.RequestException as exc: - logger.debug("Failed to send analytics: {}".format(str(exc))) diff --git a/dvc/analytics/__init__.py b/dvc/analytics/__init__.py new file mode 100644 index 0000000000..739ef253ea --- /dev/null +++ b/dvc/analytics/__init__.py @@ -0,0 +1,64 @@ +import json +import logging +import requests +import subprocess +import tempfile + +from dvc import __version__ +from dvc.analytics import user_id, system_info +from dvc.config import Config, to_bool +from dvc.exceptions import NotDvcRepoError +from dvc.repo import Repo +from dvc.scm import SCM +from dvc.utils import env2bool, is_binary + + +logger = logging.getLogger(__name__) + + +def collect_and_send_report(arguments=None, exit_code=None): + report = { + "cmd_class": arguments.func.__name__, + "cmd_return_code": exit_code, + "dvc_version": __version__, + "is_binary": is_binary(), + "scm_class": scm_in_use(), + "system_info": system_info.collect(), + "user_id": user_id.find_or_create(), + } + + with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj: + json.dump(report, fobj) + subprocess.Popen(["python", "-m", "dvc.analytics", fobj.name]) + + +def send(path): + url = "https://analytics.dvc.org" + + with open(path) as fobj: + report = json.load(fobj) + + requests.post(url, json=report, timeout=5) + + +def is_enabled(): + if env2bool("DVC_TEST"): + return False + + enabled = to_bool( + Config(validate=False) + .config.get(Config.SECTION_CORE, {}) + .get(Config.SECTION_CORE_ANALYTICS, "true") + ) + + logger.debug("Analytics is {}enabled.".format("" if enabled else "dis")) + + return enabled + + +def scm_in_use(): + try: + scm = SCM(root_dir=Repo.find_root()) + return type(scm).__name__ + except NotDvcRepoError: + pass diff --git a/dvc/analytics/__main__.py b/dvc/analytics/__main__.py new file mode 100644 index 0000000000..13ebfa53b8 --- /dev/null +++ b/dvc/analytics/__main__.py @@ -0,0 +1,8 @@ +import sys + +from dvc.analytics import send + + +if __name__ == "__main__": + report_fname = sys.argv[1] + send(report_fname) diff --git a/dvc/analytics/system_info.py b/dvc/analytics/system_info.py new file mode 100644 index 0000000000..666324cfd3 --- /dev/null +++ b/dvc/analytics/system_info.py @@ -0,0 +1,29 @@ +import platform +import sys +import distro + + +def collect(): + system = platform.system() + + if system == "Windows": + version = sys.getwindowsversion() + + return { + "os": "windows", + "windows_version_build": version.build, + "windows_version_major": version.major, + "windows_version_minor": version.minor, + "windows_version_service_pack": version.service_pack, + } + + if system == "Darwing": + return {"os": "mac", "mac_version": platform.mac_ver()[0]} + + if system == "Linux": + return { + "os": "linux", + "linux_distro": distro.id(), + "linux_distro_like": distro.like(), + "linux_distro_version": distro.version(), + } diff --git a/dvc/analytics/user_id.py b/dvc/analytics/user_id.py new file mode 100644 index 0000000000..4c360cd980 --- /dev/null +++ b/dvc/analytics/user_id.py @@ -0,0 +1,55 @@ +""" +Interact with the user's ID stored under the global config directory. + +The file should contain a JSON with a "user_id" key: + + {"user_id": "16fd2706-8baf-433b-82eb-8c7fada847da"} + +IDs are generated randomly with UUID. +""" + +import json +import logging +import uuid + +from dvc.config import Config +from dvc.lock import Lock, LockError +from dvc.utils import makedirs + + +logger = logging.getLogger(__name__) + +config_dir = Config.get_global_config_dir() +fname = config_dir / "user_id" + + +def find_or_create(): + lockfile = fname.with_suffix(".lock") + + try: + with Lock(lockfile): + return _load() or _create() + except LockError: + logger.debug("Failed to acquire {lock}".format(lockfile)) + + +def _load(): + if not fname.exists: + return None + + with open(fname) as fobj: + try: + return json.load(fobj).get("user_id") + except json.JSONDecodeError: + pass + + +def _create(): + user_id = str(uuid.uuid4()) + + makedirs(fname.parent, exist_ok=True) + + with open(fname, "w") as fobj: + json.dump({"user_id": user_id}, fobj) + + return user_id diff --git a/dvc/command/daemon.py b/dvc/command/daemon.py index 2b60bded4e..3953ab706e 100644 --- a/dvc/command/daemon.py +++ b/dvc/command/daemon.py @@ -22,16 +22,6 @@ def run(self): return 0 -class CmdDaemonAnalytics(CmdDaemonBase): - def run(self): - from dvc.analytics import Analytics - - analytics = Analytics.load(self.args.target) - analytics.send() - - return 0 - - def add_parser(subparsers, parent_parser): DAEMON_HELP = "Service daemon." daemon_parser = subparsers.add_parser( @@ -55,13 +45,3 @@ def add_parser(subparsers, parent_parser): help=DAEMON_UPDATER_HELP, ) daemon_updater_parser.set_defaults(func=CmdDaemonUpdater) - - DAEMON_ANALYTICS_HELP = "Send dvc usage analytics." - daemon_analytics_parser = daemon_subparsers.add_parser( - "analytics", - parents=[parent_parser], - description=DAEMON_ANALYTICS_HELP, - help=DAEMON_ANALYTICS_HELP, - ) - daemon_analytics_parser.add_argument("target", help="Analytics file.") - daemon_analytics_parser.set_defaults(func=CmdDaemonAnalytics) diff --git a/dvc/config.py b/dvc/config.py index b883ef259b..8e9b11c2b0 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -13,8 +13,7 @@ from dvc.exceptions import DvcException from dvc.exceptions import NotDvcRepoError -from dvc.utils.compat import open -from dvc.utils.compat import str +from dvc.utils.compat import open, str, pathlib logger = logging.getLogger(__name__) @@ -285,8 +284,8 @@ def get_global_config_dir(): """ from appdirs import user_config_dir - return user_config_dir( - appname=Config.APPNAME, appauthor=Config.APPAUTHOR + return pathlib.Path( + user_config_dir(appname=Config.APPNAME, appauthor=Config.APPAUTHOR) ) @staticmethod @@ -298,8 +297,8 @@ def get_system_config_dir(): """ from appdirs import site_config_dir - return site_config_dir( - appname=Config.APPNAME, appauthor=Config.APPAUTHOR + return pathlib.Path( + site_config_dir(appname=Config.APPNAME, appauthor=Config.APPAUTHOR) ) @staticmethod diff --git a/dvc/lock.py b/dvc/lock.py index 147118c0fe..7ffe54fc34 100644 --- a/dvc/lock.py +++ b/dvc/lock.py @@ -10,7 +10,7 @@ from dvc.exceptions import DvcException from dvc.utils import makedirs -from dvc.utils.compat import is_py3 +from dvc.utils.compat import is_py3, str DEFAULT_TIMEOUT = 5 @@ -58,7 +58,7 @@ def __init__(self, lockfile, tmp_dir=None): # [1] https://github.com/iterative/dvc/issues/2582 self._hostname = socket.gethostname() - self._lockfile = lockfile + self._lockfile = str(lockfile) self._lifetime = timedelta(days=365) # Lock for good by default self._separator = flufl.lock.SEP self._set_claimfile() @@ -111,7 +111,7 @@ class Lock(object): """ def __init__(self, lockfile, tmp_dir=None): - self.lockfile = lockfile + self.lockfile = str(lockfile) self._lock = None @property diff --git a/dvc/main.py b/dvc/main.py index 660f9420d4..86e1a9e0df 100644 --- a/dvc/main.py +++ b/dvc/main.py @@ -3,7 +3,7 @@ import logging -from dvc.analytics import Analytics +from dvc import analytics from dvc.cli import parse_args from dvc.config import ConfigError from dvc.exceptions import DvcParserError @@ -81,6 +81,7 @@ def main(argv=None): if ret != 0: logger.info(FOOTER) - Analytics().send_cmd(cmd, args, ret) + if analytics.is_enabled(): + analytics.collect_and_send_report(args, ret) return ret diff --git a/dvc/repo/init.py b/dvc/repo/init.py index e06307e0a1..5df1d0f1f0 100644 --- a/dvc/repo/init.py +++ b/dvc/repo/init.py @@ -3,7 +3,7 @@ import colorama -from dvc.analytics import Analytics +from dvc import analytics from dvc.config import Config from dvc.exceptions import InitError from dvc.repo import Repo @@ -17,7 +17,7 @@ def _welcome_message(): - if Analytics.is_enabled(): + if analytics.is_enabled(): logger.info( boxify( "DVC has enabled anonymous aggregate usage analytics.\n" diff --git a/tests/func/test_analytics.py b/tests/func/test_analytics.py deleted file mode 100644 index b42a117270..0000000000 --- a/tests/func/test_analytics.py +++ /dev/null @@ -1,65 +0,0 @@ -import os - -import mock -import requests - -from dvc.analytics import Analytics -from dvc.main import main -from tests.basic_env import TestDir -from tests.basic_env import TestDvc -from tests.basic_env import TestGit - - -def _clean_getenv(key, default=None): - """ - Remove env vars that affect dvc behavior in tests - """ - if key in ["DVC_TEST", "CI"]: - return None - return os.environ.get(key, default) - - -class TestAnalytics(TestDir): - def test(self): - a = Analytics() - a.collect() - self.assertTrue(isinstance(a.info, dict)) - self.assertNotEqual(a.info, {}) - self.assertTrue(a.PARAM_USER_ID in a.info.keys()) - self.assertTrue(a.PARAM_SYSTEM_INFO in a.info.keys()) - self.assertNotEqual(a.info[a.PARAM_SYSTEM_INFO], {}) - - @mock.patch.object(os, "getenv", new=_clean_getenv) - @mock.patch("requests.post") - def test_send(self, mockpost): - ret = main(["daemon", "analytics", Analytics().dump(), "-v"]) - self.assertEqual(ret, 0) - - self.assertTrue(mockpost.called) - - @mock.patch.object(os, "getenv", new=_clean_getenv) - @mock.patch.object( - requests, "post", side_effect=requests.exceptions.RequestException() - ) - def test_send_failed(self, mockpost): - ret = main(["daemon", "analytics", Analytics().dump(), "-v"]) - self.assertEqual(ret, 0) - - self.assertTrue(mockpost.called) - - -class TestAnalyticsGit(TestAnalytics, TestGit): - pass - - -class TestAnalyticsDvc(TestAnalytics, TestDvc): - @mock.patch("requests.post") - def test_send_disabled(self, mockpost): - ret = main(["config", "core.analytics", "false"]) - self.assertEqual(ret, 0) - - with mock.patch.object(os, "getenv", new=_clean_getenv): - ret = main(["daemon", "analytics", Analytics().dump(), "-v"]) - self.assertEqual(ret, 0) - - self.assertFalse(mockpost.called) diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index 9e7e2dab14..2fbe134d49 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -1,6 +1,6 @@ import pytest -from dvc.analytics import Analytics +from dvc import analytics @pytest.mark.parametrize( @@ -21,4 +21,4 @@ def test_is_enabled(dvc_repo, config, result, monkeypatch): # reset DVC_TEST env var, which affects `is_enabled()` monkeypatch.delenv("DVC_TEST") - assert result == Analytics.is_enabled() + assert result == analytics.is_enabled() From b618d03cfd85ea03de564bbcba92556083374db1 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 25 Nov 2019 21:30:25 -0600 Subject: [PATCH 02/46] analytics: return OS when system info not supported --- dvc/analytics/system_info.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dvc/analytics/system_info.py b/dvc/analytics/system_info.py index 666324cfd3..6fe149ba08 100644 --- a/dvc/analytics/system_info.py +++ b/dvc/analytics/system_info.py @@ -27,3 +27,5 @@ def collect(): "linux_distro_like": distro.like(), "linux_distro_version": distro.version(), } + + return {"os": system.lower()} From 7d3cbd789cbe561a526f14420ca503b2ae8a3393 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 25 Nov 2019 21:37:03 -0600 Subject: [PATCH 03/46] analytics: collect system info under the same module --- dvc/analytics/__init__.py | 36 ++++++++++++++++++++++++++++++++++-- dvc/analytics/system_info.py | 31 ------------------------------- 2 files changed, 34 insertions(+), 33 deletions(-) delete mode 100644 dvc/analytics/system_info.py diff --git a/dvc/analytics/__init__.py b/dvc/analytics/__init__.py index 739ef253ea..0bb0c70b2a 100644 --- a/dvc/analytics/__init__.py +++ b/dvc/analytics/__init__.py @@ -1,11 +1,15 @@ import json import logging +import platform import requests import subprocess +import sys import tempfile +import distro + from dvc import __version__ -from dvc.analytics import user_id, system_info +from dvc.analytics import user_id from dvc.config import Config, to_bool from dvc.exceptions import NotDvcRepoError from dvc.repo import Repo @@ -23,7 +27,7 @@ def collect_and_send_report(arguments=None, exit_code=None): "dvc_version": __version__, "is_binary": is_binary(), "scm_class": scm_in_use(), - "system_info": system_info.collect(), + "system_info": system_info(), "user_id": user_id.find_or_create(), } @@ -62,3 +66,31 @@ def scm_in_use(): return type(scm).__name__ except NotDvcRepoError: pass + + +def system_info(): + system = platform.system() + + if system == "Windows": + version = sys.getwindowsversion() + + return { + "os": "windows", + "windows_version_build": version.build, + "windows_version_major": version.major, + "windows_version_minor": version.minor, + "windows_version_service_pack": version.service_pack, + } + + if system == "Darwin": + return {"os": "mac", "mac_version": platform.mac_ver()[0]} + + if system == "Linux": + return { + "os": "linux", + "linux_distro": distro.id(), + "linux_distro_like": distro.like(), + "linux_distro_version": distro.version(), + } + + return {"os": system.lower()} diff --git a/dvc/analytics/system_info.py b/dvc/analytics/system_info.py deleted file mode 100644 index 6fe149ba08..0000000000 --- a/dvc/analytics/system_info.py +++ /dev/null @@ -1,31 +0,0 @@ -import platform -import sys -import distro - - -def collect(): - system = platform.system() - - if system == "Windows": - version = sys.getwindowsversion() - - return { - "os": "windows", - "windows_version_build": version.build, - "windows_version_major": version.major, - "windows_version_minor": version.minor, - "windows_version_service_pack": version.service_pack, - } - - if system == "Darwing": - return {"os": "mac", "mac_version": platform.mac_ver()[0]} - - if system == "Linux": - return { - "os": "linux", - "linux_distro": distro.id(), - "linux_distro_like": distro.like(), - "linux_distro_version": distro.version(), - } - - return {"os": system.lower()} From 64d102caaa781caf3d08fc303c44e2d2c6880c9f Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 25 Nov 2019 21:58:08 -0600 Subject: [PATCH 04/46] analytics: move user_id module to a method shout-out to @Suor for the suggestion --- dvc/analytics/__init__.py | 36 ++++++++++++++++++++++--- dvc/analytics/user_id.py | 55 --------------------------------------- 2 files changed, 33 insertions(+), 58 deletions(-) delete mode 100644 dvc/analytics/user_id.py diff --git a/dvc/analytics/__init__.py b/dvc/analytics/__init__.py index 0bb0c70b2a..48bced7156 100644 --- a/dvc/analytics/__init__.py +++ b/dvc/analytics/__init__.py @@ -5,16 +5,17 @@ import subprocess import sys import tempfile +import uuid import distro from dvc import __version__ -from dvc.analytics import user_id from dvc.config import Config, to_bool from dvc.exceptions import NotDvcRepoError +from dvc.lock import Lock, LockError from dvc.repo import Repo from dvc.scm import SCM -from dvc.utils import env2bool, is_binary +from dvc.utils import env2bool, is_binary, makedirs logger = logging.getLogger(__name__) @@ -28,7 +29,7 @@ def collect_and_send_report(arguments=None, exit_code=None): "is_binary": is_binary(), "scm_class": scm_in_use(), "system_info": system_info(), - "user_id": user_id.find_or_create(), + "user_id": find_or_create_user_id(), } with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj: @@ -94,3 +95,32 @@ def system_info(): } return {"os": system.lower()} + + +def find_or_create_user_id(): + """ + The user's ID is stored on a file under the global config directory. + + The file should contain a JSON with a "user_id" key: + + {"user_id": "16fd2706-8baf-433b-82eb-8c7fada847da"} + + IDs are generated randomly with UUID. + """ + config_dir = Config.get_global_config_dir() + fname = config_dir / "user_id" + lockfile = fname.with_suffix(".lock") + + try: + with Lock(lockfile): + try: + user_id = json.load(fname.read_text())["user_id"] + except (FileNotFoundError, json.JSONDecodeError, AttributeError): + user_id = str(uuid.uuid4()) + makedirs(fname.parent, exist_ok=True) + fname.write_text(json.dumps({"user_id": user_id})) + + return user_id + + except LockError: + logger.debug("Failed to acquire {lock}".format(lockfile)) diff --git a/dvc/analytics/user_id.py b/dvc/analytics/user_id.py deleted file mode 100644 index 4c360cd980..0000000000 --- a/dvc/analytics/user_id.py +++ /dev/null @@ -1,55 +0,0 @@ -""" -Interact with the user's ID stored under the global config directory. - -The file should contain a JSON with a "user_id" key: - - {"user_id": "16fd2706-8baf-433b-82eb-8c7fada847da"} - -IDs are generated randomly with UUID. -""" - -import json -import logging -import uuid - -from dvc.config import Config -from dvc.lock import Lock, LockError -from dvc.utils import makedirs - - -logger = logging.getLogger(__name__) - -config_dir = Config.get_global_config_dir() -fname = config_dir / "user_id" - - -def find_or_create(): - lockfile = fname.with_suffix(".lock") - - try: - with Lock(lockfile): - return _load() or _create() - except LockError: - logger.debug("Failed to acquire {lock}".format(lockfile)) - - -def _load(): - if not fname.exists: - return None - - with open(fname) as fobj: - try: - return json.load(fobj).get("user_id") - except json.JSONDecodeError: - pass - - -def _create(): - user_id = str(uuid.uuid4()) - - makedirs(fname.parent, exist_ok=True) - - with open(fname, "w") as fobj: - json.dump({"user_id": user_id}, fobj) - - return user_id From 2e7a6d3fe5ec7e0a6b23f332dd2de7831aa9f566 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 25 Nov 2019 21:59:41 -0600 Subject: [PATCH 05/46] :nail_care: sort methods --- dvc/analytics/__init__.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dvc/analytics/__init__.py b/dvc/analytics/__init__.py index 48bced7156..23211bb3fe 100644 --- a/dvc/analytics/__init__.py +++ b/dvc/analytics/__init__.py @@ -37,15 +37,6 @@ def collect_and_send_report(arguments=None, exit_code=None): subprocess.Popen(["python", "-m", "dvc.analytics", fobj.name]) -def send(path): - url = "https://analytics.dvc.org" - - with open(path) as fobj: - report = json.load(fobj) - - requests.post(url, json=report, timeout=5) - - def is_enabled(): if env2bool("DVC_TEST"): return False @@ -61,6 +52,15 @@ def is_enabled(): return enabled +def send(path): + url = "https://analytics.dvc.org" + + with open(path) as fobj: + report = json.load(fobj) + + requests.post(url, json=report, timeout=5) + + def scm_in_use(): try: scm = SCM(root_dir=Repo.find_root()) From 5cb437b5efccfe06c1e392d1193c8e486c799a3e Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 25 Nov 2019 22:03:54 -0600 Subject: [PATCH 06/46] analytics: move to single file shout-out to @Suor --- dvc/{analytics/__init__.py => analytics.py} | 0 dvc/analytics/__main__.py | 8 -------- 2 files changed, 8 deletions(-) rename dvc/{analytics/__init__.py => analytics.py} (100%) delete mode 100644 dvc/analytics/__main__.py diff --git a/dvc/analytics/__init__.py b/dvc/analytics.py similarity index 100% rename from dvc/analytics/__init__.py rename to dvc/analytics.py diff --git a/dvc/analytics/__main__.py b/dvc/analytics/__main__.py deleted file mode 100644 index 13ebfa53b8..0000000000 --- a/dvc/analytics/__main__.py +++ /dev/null @@ -1,8 +0,0 @@ -import sys - -from dvc.analytics import send - - -if __name__ == "__main__": - report_fname = sys.argv[1] - send(report_fname) From 610d1ee28e57d854fd80756e6ece536b0abd9a87 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 25 Nov 2019 22:10:32 -0600 Subject: [PATCH 07/46] analytics: go back to daemon implementation Lack of a better way to spawn a detached process on both windows an linux --- dvc/analytics.py | 11 ++++++++--- dvc/command/daemon.py | 23 ++++++++++++++++++++++- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 23211bb3fe..2a81907731 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -2,7 +2,6 @@ import logging import platform import requests -import subprocess import sys import tempfile import uuid @@ -11,17 +10,23 @@ from dvc import __version__ from dvc.config import Config, to_bool +from dvc.daemon import daemon from dvc.exceptions import NotDvcRepoError from dvc.lock import Lock, LockError from dvc.repo import Repo from dvc.scm import SCM from dvc.utils import env2bool, is_binary, makedirs - logger = logging.getLogger(__name__) def collect_and_send_report(arguments=None, exit_code=None): + """ + Query the system to fill a report and send it on a detached process. + + A temporary file is used as a mean of communication between the + current and detached process. + """ report = { "cmd_class": arguments.func.__name__, "cmd_return_code": exit_code, @@ -34,7 +39,7 @@ def collect_and_send_report(arguments=None, exit_code=None): with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj: json.dump(report, fobj) - subprocess.Popen(["python", "-m", "dvc.analytics", fobj.name]) + daemon(["analytics", fobj.name]) def is_enabled(): diff --git a/dvc/command/daemon.py b/dvc/command/daemon.py index 3953ab706e..4fce0649bd 100644 --- a/dvc/command/daemon.py +++ b/dvc/command/daemon.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import os + from dvc.command.base import CmdBaseNoRepo from dvc.command.base import fix_subparsers @@ -10,7 +12,6 @@ class CmdDaemonBase(CmdBaseNoRepo): class CmdDaemonUpdater(CmdDaemonBase): def run(self): - import os from dvc.repo import Repo from dvc.updater import Updater @@ -22,6 +23,16 @@ def run(self): return 0 +class CmdDaemonAnalytics(CmdDaemonBase): + def run(self): + from dvc import analytics + + analytics.send(self.args.target) + os.path.remove(self.args.target) + + return 0 + + def add_parser(subparsers, parent_parser): DAEMON_HELP = "Service daemon." daemon_parser = subparsers.add_parser( @@ -45,3 +56,13 @@ def add_parser(subparsers, parent_parser): help=DAEMON_UPDATER_HELP, ) daemon_updater_parser.set_defaults(func=CmdDaemonUpdater) + + DAEMON_ANALYTICS_HELP = "Send dvc usage analytics." + daemon_analytics_parser = daemon_subparsers.add_parser( + "analytics", + parents=[parent_parser], + description=DAEMON_ANALYTICS_HELP, + help=DAEMON_ANALYTICS_HELP, + ) + daemon_analytics_parser.add_argument("target", help="Analytics file.") + daemon_analytics_parser.set_defaults(func=CmdDaemonAnalytics) From d958870edfddb49de17480172021063a5fc2c7fd Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 26 Nov 2019 10:45:19 -0600 Subject: [PATCH 08/46] analytics: exit_code -> return_code shout-out to @efiop for the suggestion --- dvc/analytics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 2a81907731..75ce5c58ce 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -20,7 +20,7 @@ logger = logging.getLogger(__name__) -def collect_and_send_report(arguments=None, exit_code=None): +def collect_and_send_report(arguments=None, return_code=None): """ Query the system to fill a report and send it on a detached process. @@ -29,7 +29,7 @@ def collect_and_send_report(arguments=None, exit_code=None): """ report = { "cmd_class": arguments.func.__name__, - "cmd_return_code": exit_code, + "cmd_return_code": return_code, "dvc_version": __version__, "is_binary": is_binary(), "scm_class": scm_in_use(), From 34d3004779e279e568762d15e974b10c848e4ee0 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 26 Nov 2019 12:07:18 -0600 Subject: [PATCH 09/46] :face_palm: fix wrong module --- dvc/command/daemon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/command/daemon.py b/dvc/command/daemon.py index 4fce0649bd..3f5f545a1c 100644 --- a/dvc/command/daemon.py +++ b/dvc/command/daemon.py @@ -28,7 +28,7 @@ def run(self): from dvc import analytics analytics.send(self.args.target) - os.path.remove(self.args.target) + os.remove(self.args.target) return 0 From 5261ed7148e8877fdf3518e96bc846f0eb542306 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 26 Nov 2019 13:40:11 -0600 Subject: [PATCH 10/46] tests: analytics.find_or_create_user_id --- dvc/analytics.py | 2 +- tests/func/test_analytics.py | 1 + tests/unit/test_analytics.py | 11 +++++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 tests/func/test_analytics.py diff --git a/dvc/analytics.py b/dvc/analytics.py index 75ce5c58ce..e17b6291cf 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -119,7 +119,7 @@ def find_or_create_user_id(): try: with Lock(lockfile): try: - user_id = json.load(fname.read_text())["user_id"] + user_id = json.loads(fname.read_text())["user_id"] except (FileNotFoundError, json.JSONDecodeError, AttributeError): user_id = str(uuid.uuid4()) makedirs(fname.parent, exist_ok=True) diff --git a/tests/func/test_analytics.py b/tests/func/test_analytics.py new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/tests/func/test_analytics.py @@ -0,0 +1 @@ + diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index 2fbe134d49..302bff3810 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -1,4 +1,5 @@ import pytest +import mock from dvc import analytics @@ -22,3 +23,13 @@ def test_is_enabled(dvc_repo, config, result, monkeypatch): monkeypatch.delenv("DVC_TEST") assert result == analytics.is_enabled() + + +def test_find_or_create_user_id(tmp_path): + with mock.patch( + "dvc.config.Config.get_global_config_dir", return_value=tmp_path + ): + created = analytics.find_or_create_user_id() + found = analytics.find_or_create_user_id() + + assert created == found From 467b110929a7816ed3ca01d5d05b8669fdd54011 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 26 Nov 2019 18:32:02 -0600 Subject: [PATCH 11/46] py2: FileNotFoundError -> IOError --- dvc/analytics.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index e17b6291cf..215c0c13f2 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -5,6 +5,7 @@ import sys import tempfile import uuid +import errno import distro @@ -120,7 +121,9 @@ def find_or_create_user_id(): with Lock(lockfile): try: user_id = json.loads(fname.read_text())["user_id"] - except (FileNotFoundError, json.JSONDecodeError, AttributeError): + except (IOError, json.JSONDecodeError, AttributeError) as exc: + if exc.errno != errno.ENOENT: + raise user_id = str(uuid.uuid4()) makedirs(fname.parent, exist_ok=True) fname.write_text(json.dumps({"user_id": user_id})) From 48b4cbad45f25589a1d5e9086a19119fd00cfad7 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 26 Nov 2019 18:32:39 -0600 Subject: [PATCH 12/46] :nail_care: change naming and docstring --- dvc/analytics.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 215c0c13f2..0bcf315270 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -18,18 +18,16 @@ from dvc.scm import SCM from dvc.utils import env2bool, is_binary, makedirs + logger = logging.getLogger(__name__) -def collect_and_send_report(arguments=None, return_code=None): +def collect_and_send_report(args=None, return_code=None): """ Query the system to fill a report and send it on a detached process. - - A temporary file is used as a mean of communication between the - current and detached process. """ report = { - "cmd_class": arguments.func.__name__, + "cmd_class": args.func.__name__ if hasattr(args, "func") else None, "cmd_return_code": return_code, "dvc_version": __version__, "is_binary": is_binary(), @@ -38,6 +36,8 @@ def collect_and_send_report(arguments=None, return_code=None): "user_id": find_or_create_user_id(), } + # A temporary file is used as a mean of communication between the + # current and detached process. with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj: json.dump(report, fobj) daemon(["analytics", fobj.name]) From 6bc24039db8178dda1b77bf307ed22b079d725e6 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 26 Nov 2019 18:33:16 -0600 Subject: [PATCH 13/46] :nail_care: black --- tests/func/test_analytics.py | 1 - tests/unit/test_analytics.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) delete mode 100644 tests/func/test_analytics.py diff --git a/tests/func/test_analytics.py b/tests/func/test_analytics.py deleted file mode 100644 index 8b13789179..0000000000 --- a/tests/func/test_analytics.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index 302bff3810..5012d16172 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -32,4 +32,4 @@ def test_find_or_create_user_id(tmp_path): created = analytics.find_or_create_user_id() found = analytics.find_or_create_user_id() - assert created == found + assert created == found From f9952046fd376579fbb37906a402d9ab00a6fb2a Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 26 Nov 2019 19:29:04 -0600 Subject: [PATCH 14/46] tests: functional and unit tests for analytics --- dvc/analytics.py | 13 +++---------- dvc/main.py | 10 +++++++++- tests/func/test_analytics.py | 14 ++++++++++++++ tests/unit/test_analytics.py | 24 ++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 11 deletions(-) create mode 100644 tests/func/test_analytics.py diff --git a/dvc/analytics.py b/dvc/analytics.py index 0bcf315270..ebdfd9baf0 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -11,7 +11,6 @@ from dvc import __version__ from dvc.config import Config, to_bool -from dvc.daemon import daemon from dvc.exceptions import NotDvcRepoError from dvc.lock import Lock, LockError from dvc.repo import Repo @@ -22,11 +21,11 @@ logger = logging.getLogger(__name__) -def collect_and_send_report(args=None, return_code=None): +def collect(args=None, return_code=None): """ - Query the system to fill a report and send it on a detached process. + Query the system to fill a report. """ - report = { + return { "cmd_class": args.func.__name__ if hasattr(args, "func") else None, "cmd_return_code": return_code, "dvc_version": __version__, @@ -36,12 +35,6 @@ def collect_and_send_report(args=None, return_code=None): "user_id": find_or_create_user_id(), } - # A temporary file is used as a mean of communication between the - # current and detached process. - with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj: - json.dump(report, fobj) - daemon(["analytics", fobj.name]) - def is_enabled(): if env2bool("DVC_TEST"): diff --git a/dvc/main.py b/dvc/main.py index 86e1a9e0df..566a46473a 100644 --- a/dvc/main.py +++ b/dvc/main.py @@ -2,10 +2,12 @@ from __future__ import unicode_literals import logging +import json from dvc import analytics from dvc.cli import parse_args from dvc.config import ConfigError +from dvc.daemon import daemon from dvc.exceptions import DvcParserError from dvc.exceptions import NotDvcRepoError from dvc.external_repo import clean_repos @@ -82,6 +84,12 @@ def main(argv=None): logger.info(FOOTER) if analytics.is_enabled(): - analytics.collect_and_send_report(args, ret) + report = analytics.collect(args, ret) + + # Use a temporary file to pass the report between the current and + # detached process. + with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj: + json.dump(report, fobj) + daemon(["analytics", fobj.name]) return ret diff --git a/tests/func/test_analytics.py b/tests/func/test_analytics.py new file mode 100644 index 0000000000..608fea00be --- /dev/null +++ b/tests/func/test_analytics.py @@ -0,0 +1,14 @@ +import mock +import json + +from dvc.main import main + + +@mock.patch("dvc.analytics.send") +def test_daemon_analytics(mock_send, tmp_path): + report = {"name": "dummy report"} + fname = tmp_path / "report" + fname.write_text(json.dumps(report)) + + assert 0 == main(["daemon", "analytics", str(fname)]) + assert mock_send.called diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index 5012d16172..3800cf0efa 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -1,9 +1,33 @@ +import json import pytest import mock from dvc import analytics +def test_collect(): + report = analytics.collect(return_code=0) + + assert report["cmd_class"] == None + assert report["cmd_return_code"] == 0 + assert report["scm_class"] in ["Git", None] + assert type(report["is_binary"]) is bool + assert type(report["system_info"]) is dict + assert type(report["dvc_version"]) is str + assert type(report["user_id"]) is str + + +@mock.patch("requests.post") +def test_send(mock_post, tmp_path): + url = "https://analytics.dvc.org" + report = {"name": "dummy report"} + fname = tmp_path / "report" + + fname.write_text(json.dumps(report)) + analytics.send(fname) + mock_post.assert_called_with(url, json=report, timeout=5) + + @pytest.mark.parametrize( "config, result", [ From 2a5cac55700878a68f31e1e18ab854ed9a82e47d Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 26 Nov 2019 19:32:26 -0600 Subject: [PATCH 15/46] :nail_care: deepsource --- dvc/analytics.py | 3 +-- dvc/main.py | 1 + tests/unit/test_analytics.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index ebdfd9baf0..4af122a9a9 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -3,7 +3,6 @@ import platform import requests import sys -import tempfile import uuid import errno @@ -124,4 +123,4 @@ def find_or_create_user_id(): return user_id except LockError: - logger.debug("Failed to acquire {lock}".format(lockfile)) + logger.debug("Failed to acquire {lockfile}".format(lockfile=lockfile)) diff --git a/dvc/main.py b/dvc/main.py index 566a46473a..07080ae60c 100644 --- a/dvc/main.py +++ b/dvc/main.py @@ -3,6 +3,7 @@ import logging import json +import tempfile from dvc import analytics from dvc.cli import parse_args diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index 3800cf0efa..e8acf1c5f6 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -8,9 +8,9 @@ def test_collect(): report = analytics.collect(return_code=0) - assert report["cmd_class"] == None assert report["cmd_return_code"] == 0 assert report["scm_class"] in ["Git", None] + assert not report["cmd_class"] assert type(report["is_binary"]) is bool assert type(report["system_info"]) is dict assert type(report["dvc_version"]) is str From e9d56a969e9851d07b3c6a7e7a318970d701a418 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 26 Nov 2019 19:39:20 -0600 Subject: [PATCH 16/46] :nail_care: sort imports --- dvc/analytics.py | 2 +- dvc/main.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 4af122a9a9..ac0a0a798b 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -1,10 +1,10 @@ +import errno import json import logging import platform import requests import sys import uuid -import errno import distro diff --git a/dvc/main.py b/dvc/main.py index 07080ae60c..2b74fe6e9b 100644 --- a/dvc/main.py +++ b/dvc/main.py @@ -1,8 +1,8 @@ """Main entry point for dvc CLI.""" from __future__ import unicode_literals -import logging import json +import logging import tempfile from dvc import analytics From 787ea9cd31993458d0380f05bcf0a68bf68dde09 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 27 Nov 2019 12:37:11 -0600 Subject: [PATCH 17/46] tests: set temporary global config --- tests/unit/test_analytics.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index e8acf1c5f6..2c443095ce 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -5,7 +5,15 @@ from dvc import analytics -def test_collect(): +@pytest.fixture +def tmp_global_config(tmp_path): + with mock.patch( + "dvc.config.Config.get_global_config_dir", return_value=tmp_path + ): + yield + + +def test_collect(tmp_global_config): report = analytics.collect(return_code=0) assert report["cmd_return_code"] == 0 @@ -49,11 +57,8 @@ def test_is_enabled(dvc_repo, config, result, monkeypatch): assert result == analytics.is_enabled() -def test_find_or_create_user_id(tmp_path): - with mock.patch( - "dvc.config.Config.get_global_config_dir", return_value=tmp_path - ): - created = analytics.find_or_create_user_id() - found = analytics.find_or_create_user_id() +def test_find_or_create_user_id(tmp_global_config): + created = analytics.find_or_create_user_id() + found = analytics.find_or_create_user_id() assert created == found From e69fffa7c74b5878dbbe257b74cb0cdee5ea0776 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 27 Nov 2019 18:36:53 -0600 Subject: [PATCH 18/46] py2 compat issues :shrug: --- dvc/analytics.py | 10 ++++------ dvc/config.py | 9 ++------- tests/func/test_analytics.py | 3 ++- tests/unit/test_analytics.py | 5 +++-- 4 files changed, 11 insertions(+), 16 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index ac0a0a798b..556dafe572 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -1,4 +1,3 @@ -import errno import json import logging import platform @@ -15,6 +14,7 @@ from dvc.repo import Repo from dvc.scm import SCM from dvc.utils import env2bool, is_binary, makedirs +from dvc.utils.compat import str, FileNotFoundError logger = logging.getLogger(__name__) @@ -53,7 +53,7 @@ def is_enabled(): def send(path): url = "https://analytics.dvc.org" - with open(path) as fobj: + with open(str(path)) as fobj: report = json.load(fobj) requests.post(url, json=report, timeout=5) @@ -113,12 +113,10 @@ def find_or_create_user_id(): with Lock(lockfile): try: user_id = json.loads(fname.read_text())["user_id"] - except (IOError, json.JSONDecodeError, AttributeError) as exc: - if exc.errno != errno.ENOENT: - raise + except (FileNotFoundError, ValueError, AttributeError): user_id = str(uuid.uuid4()) makedirs(fname.parent, exist_ok=True) - fname.write_text(json.dumps({"user_id": user_id})) + fname.write_text(str(json.dumps({"user_id": user_id}))) return user_id diff --git a/dvc/config.py b/dvc/config.py index 8e9b11c2b0..76de6cf84b 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -342,13 +342,8 @@ def _resolve_paths(self, config): return ret def _load_configs(self): - system_config_file = os.path.join( - self.get_system_config_dir(), self.CONFIG - ) - - global_config_file = os.path.join( - self.get_global_config_dir(), self.CONFIG - ) + system_config_file = str(self.get_system_config_dir() / self.CONFIG) + global_config_file = str(self.get_global_config_dir() / self.CONFIG) self._system_config = configobj.ConfigObj(system_config_file) self._global_config = configobj.ConfigObj(global_config_file) diff --git a/tests/func/test_analytics.py b/tests/func/test_analytics.py index 608fea00be..bd698703df 100644 --- a/tests/func/test_analytics.py +++ b/tests/func/test_analytics.py @@ -2,13 +2,14 @@ import json from dvc.main import main +from dvc.utils.compat import str @mock.patch("dvc.analytics.send") def test_daemon_analytics(mock_send, tmp_path): report = {"name": "dummy report"} fname = tmp_path / "report" - fname.write_text(json.dumps(report)) + fname.write_text(str(json.dumps(report))) assert 0 == main(["daemon", "analytics", str(fname)]) assert mock_send.called diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index 2c443095ce..b0f3affaae 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -3,6 +3,7 @@ import mock from dvc import analytics +from dvc.utils.compat import builtin_str, str @pytest.fixture @@ -21,7 +22,7 @@ def test_collect(tmp_global_config): assert not report["cmd_class"] assert type(report["is_binary"]) is bool assert type(report["system_info"]) is dict - assert type(report["dvc_version"]) is str + assert type(report["dvc_version"]) is builtin_str assert type(report["user_id"]) is str @@ -31,7 +32,7 @@ def test_send(mock_post, tmp_path): report = {"name": "dummy report"} fname = tmp_path / "report" - fname.write_text(json.dumps(report)) + fname.write_text(str(json.dumps(report))) analytics.send(fname) mock_post.assert_called_with(url, json=report, timeout=5) From 656f986542afe332d5102d9a54e38387139e61d2 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Thu, 28 Nov 2019 17:47:14 -0600 Subject: [PATCH 19/46] :nail_care: correct wording "disenabled" -> "disabled" --- dvc/analytics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 556dafe572..a4c321a188 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -45,7 +45,7 @@ def is_enabled(): .get(Config.SECTION_CORE_ANALYTICS, "true") ) - logger.debug("Analytics is {}enabled.".format("" if enabled else "dis")) + logger.debug("Analytics is {}abled.".format("en" if enabled else "dis")) return enabled From 946dfc14004a1dd17241c30795e48d36fce4c615 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 2 Dec 2019 17:35:21 -0600 Subject: [PATCH 20/46] analytics: send report without loading it to memory shout-out to @Suor for the suggestion --- dvc/analytics.py | 5 ++--- tests/unit/test_analytics.py | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index a4c321a188..d658c91917 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -52,11 +52,10 @@ def is_enabled(): def send(path): url = "https://analytics.dvc.org" + headers = {"content-type": "application/json"} with open(str(path)) as fobj: - report = json.load(fobj) - - requests.post(url, json=report, timeout=5) + requests.post(url, data=fobj, headers=headers, timeout=5) def scm_in_use(): diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index b0f3affaae..e48e4972d5 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -34,7 +34,8 @@ def test_send(mock_post, tmp_path): fname.write_text(str(json.dumps(report))) analytics.send(fname) - mock_post.assert_called_with(url, json=report, timeout=5) + assert mock_post.called + assert mock_post.call_args.args[0] == url @pytest.mark.parametrize( From 55960154c54338dbe68e547a3df39b7927f96175 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 2 Dec 2019 17:35:50 -0600 Subject: [PATCH 21/46] analytics: document why tmp_global_config is needed --- tests/unit/test_analytics.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index e48e4972d5..b54b8132e6 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -8,6 +8,9 @@ @pytest.fixture def tmp_global_config(tmp_path): + """ + Fixture to prevent modifying the actual global config + """ with mock.patch( "dvc.config.Config.get_global_config_dir", return_value=tmp_path ): From 2c8e1495b86e8cce017a831f9d87df016a421658 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 2 Dec 2019 21:27:31 -0600 Subject: [PATCH 22/46] analytics: use fspath instead of str --- dvc/analytics.py | 4 ++-- dvc/config.py | 6 +++--- dvc/lock.py | 6 +++--- tests/func/test_analytics.py | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index d658c91917..7939ba5c3e 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -14,7 +14,7 @@ from dvc.repo import Repo from dvc.scm import SCM from dvc.utils import env2bool, is_binary, makedirs -from dvc.utils.compat import str, FileNotFoundError +from dvc.utils.compat import str, FileNotFoundError, fspath_py35 logger = logging.getLogger(__name__) @@ -54,7 +54,7 @@ def send(path): url = "https://analytics.dvc.org" headers = {"content-type": "application/json"} - with open(str(path)) as fobj: + with open(fspath_py35(path)) as fobj: requests.post(url, data=fobj, headers=headers, timeout=5) diff --git a/dvc/config.py b/dvc/config.py index 76de6cf84b..32c7a0977e 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -13,7 +13,7 @@ from dvc.exceptions import DvcException from dvc.exceptions import NotDvcRepoError -from dvc.utils.compat import open, str, pathlib +from dvc.utils.compat import open, str, pathlib, fspath logger = logging.getLogger(__name__) @@ -342,8 +342,8 @@ def _resolve_paths(self, config): return ret def _load_configs(self): - system_config_file = str(self.get_system_config_dir() / self.CONFIG) - global_config_file = str(self.get_global_config_dir() / self.CONFIG) + system_config_file = fspath(self.get_system_config_dir() / self.CONFIG) + global_config_file = fspath(self.get_global_config_dir() / self.CONFIG) self._system_config = configobj.ConfigObj(system_config_file) self._global_config = configobj.ConfigObj(global_config_file) diff --git a/dvc/lock.py b/dvc/lock.py index 7ffe54fc34..1f3df8ebb4 100644 --- a/dvc/lock.py +++ b/dvc/lock.py @@ -10,7 +10,7 @@ from dvc.exceptions import DvcException from dvc.utils import makedirs -from dvc.utils.compat import is_py3, str +from dvc.utils.compat import is_py3, str, fspath DEFAULT_TIMEOUT = 5 @@ -58,7 +58,7 @@ def __init__(self, lockfile, tmp_dir=None): # [1] https://github.com/iterative/dvc/issues/2582 self._hostname = socket.gethostname() - self._lockfile = str(lockfile) + self._lockfile = fspath(lockfile) self._lifetime = timedelta(days=365) # Lock for good by default self._separator = flufl.lock.SEP self._set_claimfile() @@ -111,7 +111,7 @@ class Lock(object): """ def __init__(self, lockfile, tmp_dir=None): - self.lockfile = str(lockfile) + self.lockfile = fspath(lockfile) self._lock = None @property diff --git a/tests/func/test_analytics.py b/tests/func/test_analytics.py index bd698703df..97a71d6d0d 100644 --- a/tests/func/test_analytics.py +++ b/tests/func/test_analytics.py @@ -2,7 +2,7 @@ import json from dvc.main import main -from dvc.utils.compat import str +from dvc.utils.compat import fspath @mock.patch("dvc.analytics.send") @@ -11,5 +11,5 @@ def test_daemon_analytics(mock_send, tmp_path): fname = tmp_path / "report" fname.write_text(str(json.dumps(report))) - assert 0 == main(["daemon", "analytics", str(fname)]) + assert 0 == main(["daemon", "analytics", fspath(fname)]) assert mock_send.called From ca485db24040587074f4dd7d4b101fe6ab585236 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 2 Dec 2019 21:36:57 -0600 Subject: [PATCH 23/46] analytics: define report schema and use it on tests --- dvc/analytics.py | 13 +++++++++++++ tests/unit/test_analytics.py | 9 +-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 7939ba5c3e..2d0d328e7c 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -6,6 +6,7 @@ import uuid import distro +from voluptuous import Schema, Any from dvc import __version__ from dvc.config import Config, to_bool @@ -19,6 +20,18 @@ logger = logging.getLogger(__name__) +report_schema = Schema( + { + "cmd_class": Any(str, None), + "cmd_return_code": Any(int, None), + "dvc_version": str, + "is_binary": bool, + "scm_class": Any("Git", None), + "user_id": str, + "system_info": dict, + } +) + def collect(args=None, return_code=None): """ diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index b54b8132e6..953a681a3e 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -19,14 +19,7 @@ def tmp_global_config(tmp_path): def test_collect(tmp_global_config): report = analytics.collect(return_code=0) - - assert report["cmd_return_code"] == 0 - assert report["scm_class"] in ["Git", None] - assert not report["cmd_class"] - assert type(report["is_binary"]) is bool - assert type(report["system_info"]) is dict - assert type(report["dvc_version"]) is builtin_str - assert type(report["user_id"]) is str + assert analytics.report_schema(report) @mock.patch("requests.post") From e0d47a138221a78a8dd05dbf7ec9927c0312baed Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 2 Dec 2019 21:57:06 -0600 Subject: [PATCH 24/46] :nail_care: formatting --- dvc/analytics.py | 6 +++--- dvc/lock.py | 2 +- tests/unit/test_analytics.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 2d0d328e7c..4a4a4060bc 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -15,7 +15,7 @@ from dvc.repo import Repo from dvc.scm import SCM from dvc.utils import env2bool, is_binary, makedirs -from dvc.utils.compat import str, FileNotFoundError, fspath_py35 +from dvc.utils.compat import builtin_str, str, FileNotFoundError, fspath_py35 logger = logging.getLogger(__name__) @@ -24,10 +24,10 @@ { "cmd_class": Any(str, None), "cmd_return_code": Any(int, None), - "dvc_version": str, + "dvc_version": Any(builtin_str, str), "is_binary": bool, "scm_class": Any("Git", None), - "user_id": str, + "user_id": Any(builtin_str, str), "system_info": dict, } ) diff --git a/dvc/lock.py b/dvc/lock.py index 1f3df8ebb4..d4925e9638 100644 --- a/dvc/lock.py +++ b/dvc/lock.py @@ -10,7 +10,7 @@ from dvc.exceptions import DvcException from dvc.utils import makedirs -from dvc.utils.compat import is_py3, str, fspath +from dvc.utils.compat import is_py3, fspath DEFAULT_TIMEOUT = 5 diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index 953a681a3e..e62c6e7b5b 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -3,7 +3,7 @@ import mock from dvc import analytics -from dvc.utils.compat import builtin_str, str +from dvc.utils.compat import str @pytest.fixture From 8b5ca253ec376aa3be484ba376236a218ba38368 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 2 Dec 2019 22:47:31 -0600 Subject: [PATCH 25/46] analytics: move report schema to tests --- dvc/analytics.py | 15 +-------------- tests/unit/test_analytics.py | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 4a4a4060bc..7939ba5c3e 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -6,7 +6,6 @@ import uuid import distro -from voluptuous import Schema, Any from dvc import __version__ from dvc.config import Config, to_bool @@ -15,23 +14,11 @@ from dvc.repo import Repo from dvc.scm import SCM from dvc.utils import env2bool, is_binary, makedirs -from dvc.utils.compat import builtin_str, str, FileNotFoundError, fspath_py35 +from dvc.utils.compat import str, FileNotFoundError, fspath_py35 logger = logging.getLogger(__name__) -report_schema = Schema( - { - "cmd_class": Any(str, None), - "cmd_return_code": Any(int, None), - "dvc_version": Any(builtin_str, str), - "is_binary": bool, - "scm_class": Any("Git", None), - "user_id": Any(builtin_str, str), - "system_info": dict, - } -) - def collect(args=None, return_code=None): """ diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index e62c6e7b5b..7a864bfa3c 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -2,8 +2,10 @@ import pytest import mock +from voluptuous import Schema, Any + from dvc import analytics -from dvc.utils.compat import str +from dvc.utils.compat import str, builtin_str @pytest.fixture @@ -18,8 +20,20 @@ def tmp_global_config(tmp_path): def test_collect(tmp_global_config): + schema = Schema( + { + "cmd_class": Any(str, None), + "cmd_return_code": Any(int, None), + "dvc_version": Any(builtin_str, str), + "is_binary": bool, + "scm_class": Any("Git", None), + "user_id": Any(builtin_str, str), + "system_info": dict, + } + ) + report = analytics.collect(return_code=0) - assert analytics.report_schema(report) + assert schema(report) @mock.patch("requests.post") From 8a32b28b543abb0eae550e0f28fefe6ec1cc06d0 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 2 Dec 2019 22:54:16 -0600 Subject: [PATCH 26/46] analytics: collect and send on daemon --- dvc/analytics.py | 13 +++++-------- dvc/command/daemon.py | 14 ++++++++++---- dvc/main.py | 12 +++--------- tests/func/test_analytics.py | 8 +------- tests/unit/test_analytics.py | 8 ++------ 5 files changed, 21 insertions(+), 34 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 7939ba5c3e..6ea473571b 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -14,18 +14,18 @@ from dvc.repo import Repo from dvc.scm import SCM from dvc.utils import env2bool, is_binary, makedirs -from dvc.utils.compat import str, FileNotFoundError, fspath_py35 +from dvc.utils.compat import str, FileNotFoundError logger = logging.getLogger(__name__) -def collect(args=None, return_code=None): +def collect(cmd_class=None, return_code=None): """ Query the system to fill a report. """ return { - "cmd_class": args.func.__name__ if hasattr(args, "func") else None, + "cmd_class": cmd_class, "cmd_return_code": return_code, "dvc_version": __version__, "is_binary": is_binary(), @@ -50,12 +50,9 @@ def is_enabled(): return enabled -def send(path): +def send(report): url = "https://analytics.dvc.org" - headers = {"content-type": "application/json"} - - with open(fspath_py35(path)) as fobj: - requests.post(url, data=fobj, headers=headers, timeout=5) + requests.post(url, json=report, timeout=5) def scm_in_use(): diff --git a/dvc/command/daemon.py b/dvc/command/daemon.py index 3f5f545a1c..6bcb15f549 100644 --- a/dvc/command/daemon.py +++ b/dvc/command/daemon.py @@ -25,10 +25,11 @@ def run(self): class CmdDaemonAnalytics(CmdDaemonBase): def run(self): + """Collect and send analytics""" from dvc import analytics - analytics.send(self.args.target) - os.remove(self.args.target) + report = analytics.collect(self.args.cmd_class, self.args.ret) + analytics.send(report) return 0 @@ -57,12 +58,17 @@ def add_parser(subparsers, parent_parser): ) daemon_updater_parser.set_defaults(func=CmdDaemonUpdater) - DAEMON_ANALYTICS_HELP = "Send dvc usage analytics." + DAEMON_ANALYTICS_HELP = "Collect and send dvc usage analytics." daemon_analytics_parser = daemon_subparsers.add_parser( "analytics", parents=[parent_parser], description=DAEMON_ANALYTICS_HELP, help=DAEMON_ANALYTICS_HELP, ) - daemon_analytics_parser.add_argument("target", help="Analytics file.") + daemon_analytics_parser.add_argument( + "cmd_class", help="Class called through main" + ) + daemon_analytics_parser.add_argument( + "ret", help="Return code from running such class" + ) daemon_analytics_parser.set_defaults(func=CmdDaemonAnalytics) diff --git a/dvc/main.py b/dvc/main.py index 2b74fe6e9b..c88661714e 100644 --- a/dvc/main.py +++ b/dvc/main.py @@ -1,9 +1,7 @@ """Main entry point for dvc CLI.""" from __future__ import unicode_literals -import json import logging -import tempfile from dvc import analytics from dvc.cli import parse_args @@ -85,12 +83,8 @@ def main(argv=None): logger.info(FOOTER) if analytics.is_enabled(): - report = analytics.collect(args, ret) - - # Use a temporary file to pass the report between the current and - # detached process. - with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj: - json.dump(report, fobj) - daemon(["analytics", fobj.name]) + # Include the command class and return code on the analytics report + cmd_class = args.func.__name__ if hasattr(args, "func") else None + daemon(["analytics", cmd_class, ret]) return ret diff --git a/tests/func/test_analytics.py b/tests/func/test_analytics.py index 97a71d6d0d..31d62d56c4 100644 --- a/tests/func/test_analytics.py +++ b/tests/func/test_analytics.py @@ -1,15 +1,9 @@ import mock -import json from dvc.main import main -from dvc.utils.compat import fspath @mock.patch("dvc.analytics.send") def test_daemon_analytics(mock_send, tmp_path): - report = {"name": "dummy report"} - fname = tmp_path / "report" - fname.write_text(str(json.dumps(report))) - - assert 0 == main(["daemon", "analytics", fspath(fname)]) + assert 0 == main(["daemon", "analytics", None, 0]) assert mock_send.called diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index 7a864bfa3c..c568987211 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -1,4 +1,3 @@ -import json import pytest import mock @@ -40,12 +39,9 @@ def test_collect(tmp_global_config): def test_send(mock_post, tmp_path): url = "https://analytics.dvc.org" report = {"name": "dummy report"} - fname = tmp_path / "report" - fname.write_text(str(json.dumps(report))) - analytics.send(fname) - assert mock_post.called - assert mock_post.call_args.args[0] == url + analytics.send(report) + mock_post.assert_called_with(url, json=report, timeout=5) @pytest.mark.parametrize( From 624cbb2d91a9b16a56e6e5dd56c355505661146f Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Mon, 2 Dec 2019 23:46:04 -0600 Subject: [PATCH 27/46] :nail_care: more specific comment about analytics --- dvc/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dvc/main.py b/dvc/main.py index c88661714e..9d6469a73f 100644 --- a/dvc/main.py +++ b/dvc/main.py @@ -83,7 +83,8 @@ def main(argv=None): logger.info(FOOTER) if analytics.is_enabled(): - # Include the command class and return code on the analytics report + # Collect and send analytics report in a separate process, + # including the command class and return code on the report cmd_class = args.func.__name__ if hasattr(args, "func") else None daemon(["analytics", cmd_class, ret]) From 285620d96e16bb8be5a359383d630a4a770c3059 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 3 Dec 2019 09:41:23 -0600 Subject: [PATCH 28/46] tests: mock analytics.collect while testing daemon --- tests/func/test_analytics.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/func/test_analytics.py b/tests/func/test_analytics.py index 31d62d56c4..e80964038d 100644 --- a/tests/func/test_analytics.py +++ b/tests/func/test_analytics.py @@ -3,7 +3,9 @@ from dvc.main import main +@mock.patch("dvc.analytics.collect") @mock.patch("dvc.analytics.send") -def test_daemon_analytics(mock_send, tmp_path): +def test_daemon_analytics(mock_collect, mock_send, tmp_path): assert 0 == main(["daemon", "analytics", None, 0]) + assert mock_collect.called assert mock_send.called From 02eaa9a14fa2994aa04936ae0d4c692c4d9067be Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Tue, 3 Dec 2019 18:38:11 -0600 Subject: [PATCH 29/46] py35: support for fspath(pathlib.Path()) --- dvc/utils/compat.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dvc/utils/compat.py b/dvc/utils/compat.py index 372c8d2c60..ab079c1d08 100644 --- a/dvc/utils/compat.py +++ b/dvc/utils/compat.py @@ -207,6 +207,12 @@ def fspath(path): except AttributeError: if hasattr(path_type, "__fspath__"): raise + + # Support for pathlib.Path in Python 3.5, since it doesn't + # implement `__fspath__` + if isinstance(path, pathlib.Path): + return str(path) + else: raise TypeError( "expected str, bytes or os.PathLike object, " From ede010384b24a925a8fe003bfb8b8bec7fd5238d Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 4 Dec 2019 18:36:59 -0600 Subject: [PATCH 30/46] py2: use convert_to_unicode instead of str --- dvc/analytics.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 6ea473571b..1da3789325 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -15,6 +15,7 @@ from dvc.scm import SCM from dvc.utils import env2bool, is_binary, makedirs from dvc.utils.compat import str, FileNotFoundError +from dvc.utils.compat import str, FileNotFoundError, convert_to_unicode logger = logging.getLogger(__name__) @@ -112,7 +113,8 @@ def find_or_create_user_id(): except (FileNotFoundError, ValueError, AttributeError): user_id = str(uuid.uuid4()) makedirs(fname.parent, exist_ok=True) - fname.write_text(str(json.dumps({"user_id": user_id}))) + data = convert_to_unicode(json.dumps({"user_id": user_id})) + fname.write_text(data) return user_id From 7a3aae1c7e8a5b276912e7d3f280e47b1c542c03 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 4 Dec 2019 19:43:00 -0600 Subject: [PATCH 31/46] tests: add unit test for analytics.system_info --- tests/unit/test_analytics.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index c568987211..4a2388c950 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -1,5 +1,7 @@ import pytest import mock +import platform +import json from voluptuous import Schema, Any @@ -65,6 +67,36 @@ def test_is_enabled(dvc_repo, config, result, monkeypatch): assert result == analytics.is_enabled() +def test_system_info(): + schema = Schema({"os": Any("windows", "mac", "linux")}) + + system = platform.system() + + if system == "Windows": + schema = schema.extend( + { + "windows_version_build": int, + "windows_version_major": int, + "windows_version_minor": int, + "windows_version_service_pack": str, + } + ) + + if system == "Darwin": + schema = schema.extend({"mac_version": str}) + + if system == "Linux": + schema = schema.extend( + { + "linux_distro": str, + "linux_distro_like": Any(str, None), + "linux_distro_version": Any(str, None), + } + ) + + assert schema(analytics.system_info()) + + def test_find_or_create_user_id(tmp_global_config): created = analytics.find_or_create_user_id() found = analytics.find_or_create_user_id() From 05a68684fd1780893338b78c25033925a319dc21 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 4 Dec 2019 19:44:12 -0600 Subject: [PATCH 32/46] tests: isolate global config from analytics tests fix #2806 --- tests/unit/test_analytics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index 4a2388c950..b749893813 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -56,7 +56,7 @@ def test_send(mock_post, tmp_path): ({"analytics": "false", "unknown": "broken"}, False), ], ) -def test_is_enabled(dvc_repo, config, result, monkeypatch): +def test_is_enabled(dvc_repo, config, result, monkeypatch, tmp_global_config): configobj = dvc_repo.config._repo_config configobj["core"] = config configobj.write() From 71ead3f62e5f3f559b04cd776dd48c638ce63748 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Wed, 4 Dec 2019 19:46:40 -0600 Subject: [PATCH 33/46] analytics: use a tempfile for inter-process communication --- dvc/analytics.py | 41 ++++++++++++++++++++++++++++++------ dvc/command/daemon.py | 14 +++++------- dvc/main.py | 6 +----- tests/func/test_analytics.py | 23 +++++++++++++++----- tests/unit/test_analytics.py | 18 +++++++++------- 5 files changed, 69 insertions(+), 33 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 1da3789325..6501ece8f9 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -3,31 +3,57 @@ import platform import requests import sys +import tempfile import uuid import distro from dvc import __version__ from dvc.config import Config, to_bool +from dvc.daemon import daemon from dvc.exceptions import NotDvcRepoError from dvc.lock import Lock, LockError from dvc.repo import Repo from dvc.scm import SCM from dvc.utils import env2bool, is_binary, makedirs -from dvc.utils.compat import str, FileNotFoundError from dvc.utils.compat import str, FileNotFoundError, convert_to_unicode logger = logging.getLogger(__name__) -def collect(cmd_class=None, return_code=None): +def collect_and_send_report(args=None, return_code=None): """ - Query the system to fill a report. + Collect information from the runtime/environment and the command + being executed into a report and send it over the network. + + To prevent analytics from blocking the execution of the main thread, + sending the report is done in a separate process. + + The inter-process communication happens through a file containing the + report as a JSON, where the _collector_ generates it and the _sender_ + removes it after sending it. + """ + report = runtime_info() + + # Include command execution information on the report + report.update( + { + "cmd_class": args.func.__name__ if hasattr(args, "func") else None, + "cmd_return_code": return_code, + } + ) + + with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj: + json.dump(report, fobj) + daemon(["analytics", fobj.name]) + + +def runtime_info(): + """ + Gather information from the environment where DVC runs to fill a report. """ return { - "cmd_class": cmd_class, - "cmd_return_code": return_code, "dvc_version": __version__, "is_binary": is_binary(), "scm_class": scm_in_use(), @@ -53,7 +79,10 @@ def is_enabled(): def send(report): url = "https://analytics.dvc.org" - requests.post(url, json=report, timeout=5) + headers = {"content-type": "application/json"} + + with open(report, "rb") as fobj: + requests.post(url, data=fobj, headers=headers, timeout=5) def scm_in_use(): diff --git a/dvc/command/daemon.py b/dvc/command/daemon.py index 6bcb15f549..68d4b36f6d 100644 --- a/dvc/command/daemon.py +++ b/dvc/command/daemon.py @@ -25,11 +25,12 @@ def run(self): class CmdDaemonAnalytics(CmdDaemonBase): def run(self): - """Collect and send analytics""" from dvc import analytics - report = analytics.collect(self.args.cmd_class, self.args.ret) + report = self.args.target + analytics.send(report) + os.remove(report) return 0 @@ -58,17 +59,12 @@ def add_parser(subparsers, parent_parser): ) daemon_updater_parser.set_defaults(func=CmdDaemonUpdater) - DAEMON_ANALYTICS_HELP = "Collect and send dvc usage analytics." + DAEMON_ANALYTICS_HELP = "Send dvc usage analytics." daemon_analytics_parser = daemon_subparsers.add_parser( "analytics", parents=[parent_parser], description=DAEMON_ANALYTICS_HELP, help=DAEMON_ANALYTICS_HELP, ) - daemon_analytics_parser.add_argument( - "cmd_class", help="Class called through main" - ) - daemon_analytics_parser.add_argument( - "ret", help="Return code from running such class" - ) + daemon_analytics_parser.add_argument("target", help="Analytics file.") daemon_analytics_parser.set_defaults(func=CmdDaemonAnalytics) diff --git a/dvc/main.py b/dvc/main.py index 9d6469a73f..86e1a9e0df 100644 --- a/dvc/main.py +++ b/dvc/main.py @@ -6,7 +6,6 @@ from dvc import analytics from dvc.cli import parse_args from dvc.config import ConfigError -from dvc.daemon import daemon from dvc.exceptions import DvcParserError from dvc.exceptions import NotDvcRepoError from dvc.external_repo import clean_repos @@ -83,9 +82,6 @@ def main(argv=None): logger.info(FOOTER) if analytics.is_enabled(): - # Collect and send analytics report in a separate process, - # including the command class and return code on the report - cmd_class = args.func.__name__ if hasattr(args, "func") else None - daemon(["analytics", cmd_class, ret]) + analytics.collect_and_send_report(args, ret) return ret diff --git a/tests/func/test_analytics.py b/tests/func/test_analytics.py index e80964038d..df7454965b 100644 --- a/tests/func/test_analytics.py +++ b/tests/func/test_analytics.py @@ -1,11 +1,24 @@ import mock from dvc.main import main +from dvc.utils.compat import fspath -@mock.patch("dvc.analytics.collect") @mock.patch("dvc.analytics.send") -def test_daemon_analytics(mock_collect, mock_send, tmp_path): - assert 0 == main(["daemon", "analytics", None, 0]) - assert mock_collect.called - assert mock_send.called +@mock.patch("os.remove") +def test_daemon_analytics(mock_remove, mock_send, tmp_path): + report = fspath(tmp_path) + assert 0 == main(["daemon", "analytics", report]) + + mock_send.assert_called_with(report) + mock_remove.assert_called_with(report) + + +@mock.patch("dvc.daemon._spawn") +@mock.patch("dvc.analytics.is_enabled", return_value=True) +@mock.patch("dvc.analytics.runtime_info", return_value={}) +def test_main_analytics(mock_is_enabled, mock_daemon, mock_report, dvc_repo): + assert 0 == main(["add", "foo"]) + assert mock_is_enabled.called + assert mock_report.called + assert mock_daemon.called diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index b749893813..69754fd608 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -20,30 +20,32 @@ def tmp_global_config(tmp_path): yield -def test_collect(tmp_global_config): +def test_runtime_info(tmp_global_config): schema = Schema( { - "cmd_class": Any(str, None), - "cmd_return_code": Any(int, None), "dvc_version": Any(builtin_str, str), "is_binary": bool, - "scm_class": Any("Git", None), + "scm_class": Any("Git", "NoSCM"), "user_id": Any(builtin_str, str), "system_info": dict, } ) - report = analytics.collect(return_code=0) - assert schema(report) + assert schema(analytics.runtime_info()) @mock.patch("requests.post") def test_send(mock_post, tmp_path): url = "https://analytics.dvc.org" report = {"name": "dummy report"} + fname = tmp_path / "report" - analytics.send(report) - mock_post.assert_called_with(url, json=report, timeout=5) + with open(fname, "w") as fobj: + json.dump(report, fobj) + + analytics.send(fname) + assert mock_post.called + assert mock_post.call_args.args[0] == url @pytest.mark.parametrize( From 7e8bdbd6035221c4ebae2058b21011f9b7ad6e7a Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Thu, 5 Dec 2019 19:37:15 -0600 Subject: [PATCH 34/46] remove pathlib / fspath changes related to the patch --- dvc/analytics.py | 21 ++++++++++++++------- dvc/config.py | 19 ++++++++++++------- dvc/lock.py | 6 +++--- dvc/utils/compat.py | 6 ------ 4 files changed, 29 insertions(+), 23 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 6501ece8f9..0be3ed91a0 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -1,5 +1,6 @@ import json import logging +import os import platform import requests import sys @@ -16,7 +17,7 @@ from dvc.repo import Repo from dvc.scm import SCM from dvc.utils import env2bool, is_binary, makedirs -from dvc.utils.compat import str, FileNotFoundError, convert_to_unicode +from dvc.utils.compat import str, FileNotFoundError logger = logging.getLogger(__name__) @@ -132,18 +133,24 @@ def find_or_create_user_id(): IDs are generated randomly with UUID. """ config_dir = Config.get_global_config_dir() - fname = config_dir / "user_id" - lockfile = fname.with_suffix(".lock") + fname = os.path.join(config_dir, "user_id") + lockfile = os.path.join(config_dir, "user_id.lock") + + # Since the `fname` and `lockfile` are under the global config, + # we need to make sure such directory exist already + makedirs(config_dir, exist_ok=True) try: with Lock(lockfile): try: - user_id = json.loads(fname.read_text())["user_id"] + with open(fname, "r") as fobj: + user_id = json.load(fobj)["user_id"] + except (FileNotFoundError, ValueError, AttributeError): user_id = str(uuid.uuid4()) - makedirs(fname.parent, exist_ok=True) - data = convert_to_unicode(json.dumps({"user_id": user_id})) - fname.write_text(data) + + with open(fname, "w") as fobj: + json.dump({"user_id": user_id}, fobj) return user_id diff --git a/dvc/config.py b/dvc/config.py index 32c7a0977e..62823deb6c 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -13,7 +13,7 @@ from dvc.exceptions import DvcException from dvc.exceptions import NotDvcRepoError -from dvc.utils.compat import open, str, pathlib, fspath +from dvc.utils.compat import open, str logger = logging.getLogger(__name__) @@ -284,8 +284,8 @@ def get_global_config_dir(): """ from appdirs import user_config_dir - return pathlib.Path( - user_config_dir(appname=Config.APPNAME, appauthor=Config.APPAUTHOR) + return user_config_dir( + appname=Config.APPNAME, appauthor=Config.APPAUTHOR ) @staticmethod @@ -297,8 +297,8 @@ def get_system_config_dir(): """ from appdirs import site_config_dir - return pathlib.Path( - site_config_dir(appname=Config.APPNAME, appauthor=Config.APPAUTHOR) + return site_config_dir( + appname=Config.APPNAME, appauthor=Config.APPAUTHOR ) @staticmethod @@ -342,8 +342,13 @@ def _resolve_paths(self, config): return ret def _load_configs(self): - system_config_file = fspath(self.get_system_config_dir() / self.CONFIG) - global_config_file = fspath(self.get_global_config_dir() / self.CONFIG) + system_config_file = os.path.join( + self.get_system_config_dir(), self.CONFIG + ) + + global_config_file = os.path.join( + self.get_global_config_dir(), self.CONFIG + ) self._system_config = configobj.ConfigObj(system_config_file) self._global_config = configobj.ConfigObj(global_config_file) diff --git a/dvc/lock.py b/dvc/lock.py index d4925e9638..147118c0fe 100644 --- a/dvc/lock.py +++ b/dvc/lock.py @@ -10,7 +10,7 @@ from dvc.exceptions import DvcException from dvc.utils import makedirs -from dvc.utils.compat import is_py3, fspath +from dvc.utils.compat import is_py3 DEFAULT_TIMEOUT = 5 @@ -58,7 +58,7 @@ def __init__(self, lockfile, tmp_dir=None): # [1] https://github.com/iterative/dvc/issues/2582 self._hostname = socket.gethostname() - self._lockfile = fspath(lockfile) + self._lockfile = lockfile self._lifetime = timedelta(days=365) # Lock for good by default self._separator = flufl.lock.SEP self._set_claimfile() @@ -111,7 +111,7 @@ class Lock(object): """ def __init__(self, lockfile, tmp_dir=None): - self.lockfile = fspath(lockfile) + self.lockfile = lockfile self._lock = None @property diff --git a/dvc/utils/compat.py b/dvc/utils/compat.py index ab079c1d08..372c8d2c60 100644 --- a/dvc/utils/compat.py +++ b/dvc/utils/compat.py @@ -207,12 +207,6 @@ def fspath(path): except AttributeError: if hasattr(path_type, "__fspath__"): raise - - # Support for pathlib.Path in Python 3.5, since it doesn't - # implement `__fspath__` - if isinstance(path, pathlib.Path): - return str(path) - else: raise TypeError( "expected str, bytes or os.PathLike object, " From bc2471cd30b817adaf71d0244a450ef617d12ac4 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Thu, 5 Dec 2019 19:56:45 -0600 Subject: [PATCH 35/46] tests: adjust scm_class schema --- tests/unit/test_analytics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index 69754fd608..11678617aa 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -25,7 +25,7 @@ def test_runtime_info(tmp_global_config): { "dvc_version": Any(builtin_str, str), "is_binary": bool, - "scm_class": Any("Git", "NoSCM"), + "scm_class": Any("Git", None), "user_id": Any(builtin_str, str), "system_info": dict, } From c82d9dcbc104592c6c2525fb401cfce76aa5b450 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Thu, 5 Dec 2019 23:18:44 -0600 Subject: [PATCH 36/46] compat: bring back unicode literals --- dvc/analytics.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dvc/analytics.py b/dvc/analytics.py index 0be3ed91a0..f0bc086d7b 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import json import logging import os From f704e5468bf949e8b6789e2b047ef04fc91f4c1f Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Fri, 6 Dec 2019 10:20:30 -0600 Subject: [PATCH 37/46] tests: stringify tmp_global_config since it doesnt return a pathlike object --- tests/unit/test_analytics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index 11678617aa..46a40ae890 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -15,7 +15,7 @@ def tmp_global_config(tmp_path): Fixture to prevent modifying the actual global config """ with mock.patch( - "dvc.config.Config.get_global_config_dir", return_value=tmp_path + "dvc.config.Config.get_global_config_dir", return_value=str(tmp_path) ): yield From da064e2ede7cc041ac16842b6993d88b18743da0 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Fri, 6 Dec 2019 12:28:06 -0600 Subject: [PATCH 38/46] analytics: remove the report after sending it --- dvc/analytics.py | 9 +++++++++ dvc/command/daemon.py | 8 ++------ tests/func/test_analytics.py | 4 +--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index f0bc086d7b..624b6c0c2a 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -81,12 +81,21 @@ def is_enabled(): def send(report): + """ + Side effect: Removes the report after sending it. + + The report is generated and stored in a temporary file, see: + `collect_and_send_report`. Sending happens on another process, + thus, the need of removing such file afterwards. + """ url = "https://analytics.dvc.org" headers = {"content-type": "application/json"} with open(report, "rb") as fobj: requests.post(url, data=fobj, headers=headers, timeout=5) + os.remove(report) + def scm_in_use(): try: diff --git a/dvc/command/daemon.py b/dvc/command/daemon.py index 68d4b36f6d..da92105847 100644 --- a/dvc/command/daemon.py +++ b/dvc/command/daemon.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import os - from dvc.command.base import CmdBaseNoRepo from dvc.command.base import fix_subparsers @@ -12,6 +10,7 @@ class CmdDaemonBase(CmdBaseNoRepo): class CmdDaemonUpdater(CmdDaemonBase): def run(self): + import os from dvc.repo import Repo from dvc.updater import Updater @@ -27,10 +26,7 @@ class CmdDaemonAnalytics(CmdDaemonBase): def run(self): from dvc import analytics - report = self.args.target - - analytics.send(report) - os.remove(report) + analytics.send(self.args.target) return 0 diff --git a/tests/func/test_analytics.py b/tests/func/test_analytics.py index df7454965b..863b0e329c 100644 --- a/tests/func/test_analytics.py +++ b/tests/func/test_analytics.py @@ -5,13 +5,11 @@ @mock.patch("dvc.analytics.send") -@mock.patch("os.remove") -def test_daemon_analytics(mock_remove, mock_send, tmp_path): +def test_daemon_analytics(mock_send, tmp_path): report = fspath(tmp_path) assert 0 == main(["daemon", "analytics", report]) mock_send.assert_called_with(report) - mock_remove.assert_called_with(report) @mock.patch("dvc.daemon._spawn") From 5a8033415da073dbccd5fb1b15e2bf3603208881 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Fri, 6 Dec 2019 13:54:17 -0600 Subject: [PATCH 39/46] tests: use str, builtin_str in schema --- dvc/analytics.py | 2 -- tests/unit/test_analytics.py | 19 +++++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 624b6c0c2a..088b24eb89 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json import logging import os diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index 46a40ae890..60e7a565b4 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -9,6 +9,9 @@ from dvc.utils.compat import str, builtin_str +string = Any(str, builtin_str) + + @pytest.fixture def tmp_global_config(tmp_path): """ @@ -23,10 +26,10 @@ def tmp_global_config(tmp_path): def test_runtime_info(tmp_global_config): schema = Schema( { - "dvc_version": Any(builtin_str, str), + "dvc_version": string, "is_binary": bool, "scm_class": Any("Git", None), - "user_id": Any(builtin_str, str), + "user_id": string, "system_info": dict, } ) @@ -38,7 +41,7 @@ def test_runtime_info(tmp_global_config): def test_send(mock_post, tmp_path): url = "https://analytics.dvc.org" report = {"name": "dummy report"} - fname = tmp_path / "report" + fname = str(tmp_path / "report") with open(fname, "w") as fobj: json.dump(report, fobj) @@ -80,19 +83,19 @@ def test_system_info(): "windows_version_build": int, "windows_version_major": int, "windows_version_minor": int, - "windows_version_service_pack": str, + "windows_version_service_pack": string, } ) if system == "Darwin": - schema = schema.extend({"mac_version": str}) + schema = schema.extend({"mac_version": string}) if system == "Linux": schema = schema.extend( { - "linux_distro": str, - "linux_distro_like": Any(str, None), - "linux_distro_version": Any(str, None), + "linux_distro": string, + "linux_distro_like": string, + "linux_distro_version": string, } ) From 2145daa130c6c5323f001863d92e8997a2d6c429 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Fri, 6 Dec 2019 16:23:58 -0600 Subject: [PATCH 40/46] analytics: define private methods --- dvc/analytics.py | 34 +++++++++++++++++----------------- tests/func/test_analytics.py | 2 +- tests/unit/test_analytics.py | 8 ++++---- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 088b24eb89..2826a95f74 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -35,7 +35,7 @@ def collect_and_send_report(args=None, return_code=None): report as a JSON, where the _collector_ generates it and the _sender_ removes it after sending it. """ - report = runtime_info() + report = _runtime_info() # Include command execution information on the report report.update( @@ -50,19 +50,6 @@ def collect_and_send_report(args=None, return_code=None): daemon(["analytics", fobj.name]) -def runtime_info(): - """ - Gather information from the environment where DVC runs to fill a report. - """ - return { - "dvc_version": __version__, - "is_binary": is_binary(), - "scm_class": scm_in_use(), - "system_info": system_info(), - "user_id": find_or_create_user_id(), - } - - def is_enabled(): if env2bool("DVC_TEST"): return False @@ -95,7 +82,7 @@ def send(report): os.remove(report) -def scm_in_use(): +def _scm_in_use(): try: scm = SCM(root_dir=Repo.find_root()) return type(scm).__name__ @@ -103,7 +90,20 @@ def scm_in_use(): pass -def system_info(): +def _runtime_info(): + """ + Gather information from the environment where DVC runs to fill a report. + """ + return { + "dvc_version": __version__, + "is_binary": is_binary(), + "scm_class": _scm_in_use(), + "system_info": _system_info(), + "user_id": _find_or_create_user_id(), + } + + +def _system_info(): system = platform.system() if system == "Windows": @@ -131,7 +131,7 @@ def system_info(): return {"os": system.lower()} -def find_or_create_user_id(): +def _find_or_create_user_id(): """ The user's ID is stored on a file under the global config directory. diff --git a/tests/func/test_analytics.py b/tests/func/test_analytics.py index 863b0e329c..f7cc0bfed3 100644 --- a/tests/func/test_analytics.py +++ b/tests/func/test_analytics.py @@ -14,7 +14,7 @@ def test_daemon_analytics(mock_send, tmp_path): @mock.patch("dvc.daemon._spawn") @mock.patch("dvc.analytics.is_enabled", return_value=True) -@mock.patch("dvc.analytics.runtime_info", return_value={}) +@mock.patch("dvc.analytics._runtime_info", return_value={}) def test_main_analytics(mock_is_enabled, mock_daemon, mock_report, dvc_repo): assert 0 == main(["add", "foo"]) assert mock_is_enabled.called diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index 60e7a565b4..7d4138564a 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -34,7 +34,7 @@ def test_runtime_info(tmp_global_config): } ) - assert schema(analytics.runtime_info()) + assert schema(analytics._runtime_info()) @mock.patch("requests.post") @@ -99,11 +99,11 @@ def test_system_info(): } ) - assert schema(analytics.system_info()) + assert schema(analytics._system_info()) def test_find_or_create_user_id(tmp_global_config): - created = analytics.find_or_create_user_id() - found = analytics.find_or_create_user_id() + created = analytics._find_or_create_user_id() + found = analytics._find_or_create_user_id() assert created == found From c50bd17bb48fcf07b76fa32ac7bda709e15f36d5 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Sat, 7 Dec 2019 21:13:06 -0600 Subject: [PATCH 41/46] analytics: collect execution info only when available --- dvc/analytics.py | 13 ++++++------- tests/func/test_analytics.py | 6 ++---- tests/unit/test_analytics.py | 25 +++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 2826a95f74..39e6234adc 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -37,13 +37,12 @@ def collect_and_send_report(args=None, return_code=None): """ report = _runtime_info() - # Include command execution information on the report - report.update( - { - "cmd_class": args.func.__name__ if hasattr(args, "func") else None, - "cmd_return_code": return_code, - } - ) + # Include command execution information on the report only when available. + if args and hasattr(args, "func"): + report.update({"cmd_class": args.func.__name__}) + + if return_code is not None: + report.update({"cmd_return_code": return_code}) with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj: json.dump(report, fobj) diff --git a/tests/func/test_analytics.py b/tests/func/test_analytics.py index f7cc0bfed3..6004c64127 100644 --- a/tests/func/test_analytics.py +++ b/tests/func/test_analytics.py @@ -12,11 +12,9 @@ def test_daemon_analytics(mock_send, tmp_path): mock_send.assert_called_with(report) -@mock.patch("dvc.daemon._spawn") +@mock.patch("dvc.analytics.collect_and_send_report") @mock.patch("dvc.analytics.is_enabled", return_value=True) -@mock.patch("dvc.analytics._runtime_info", return_value={}) -def test_main_analytics(mock_is_enabled, mock_daemon, mock_report, dvc_repo): +def test_main_analytics(mock_is_enabled, mock_report, dvc_repo): assert 0 == main(["add", "foo"]) assert mock_is_enabled.called assert mock_report.called - assert mock_daemon.called diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index 7d4138564a..ece2501a9a 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -6,6 +6,7 @@ from voluptuous import Schema, Any from dvc import analytics +from dvc.cli import parse_args from dvc.utils.compat import str, builtin_str @@ -23,6 +24,30 @@ def tmp_global_config(tmp_path): yield +@mock.patch("dvc.daemon._spawn") +@mock.patch("json.dump") +def test_collect_and_send_report(mock_json, mock_daemon, tmp_global_config): + analytics.collect_and_send_report() + report = mock_json.call_args[0][0] + + with pytest.raises(KeyError): + report["cmd_class"] + + with pytest.raises(KeyError): + report["cmd_return_code"] + + args = parse_args(['add', 'foo']) + return_code = 0 + + analytics.collect_and_send_report(args, return_code) + report = mock_json.call_args[0][0] + + assert report["cmd_class"] == "CmdAdd" + assert report["cmd_return_code"] == return_code + + assert mock_daemon.call_count == 2 + + def test_runtime_info(tmp_global_config): schema = Schema( { From 146e75be3ee0c49d24c94e171dcdac625c2d4979 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Sat, 7 Dec 2019 21:15:11 -0600 Subject: [PATCH 42/46] analytics: raise error when collecting a not supported os --- dvc/analytics.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 39e6234adc..26243d6098 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -127,7 +127,8 @@ def _system_info(): "linux_distro_version": distro.version(), } - return {"os": system.lower()} + # We don't collect data for any other system. + raise NotImplementedError def _find_or_create_user_id(): From c9f958dd001f145d09a48ac8b31568d9b0e1f261 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Sat, 7 Dec 2019 21:15:38 -0600 Subject: [PATCH 43/46] analytics: AttributeError -> KeyError --- dvc/analytics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 26243d6098..2a9e863523 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -155,7 +155,7 @@ def _find_or_create_user_id(): with open(fname, "r") as fobj: user_id = json.load(fobj)["user_id"] - except (FileNotFoundError, ValueError, AttributeError): + except (FileNotFoundError, ValueError, KeyError): user_id = str(uuid.uuid4()) with open(fname, "w") as fobj: From 3244f10e7ff8922640993ce03cedb721bd53b30b Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Sat, 7 Dec 2019 21:15:47 -0600 Subject: [PATCH 44/46] :nail_care: add dot to the end of the comment --- dvc/analytics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/analytics.py b/dvc/analytics.py index 2a9e863523..1ae02f60ac 100644 --- a/dvc/analytics.py +++ b/dvc/analytics.py @@ -146,7 +146,7 @@ def _find_or_create_user_id(): lockfile = os.path.join(config_dir, "user_id.lock") # Since the `fname` and `lockfile` are under the global config, - # we need to make sure such directory exist already + # we need to make sure such directory exist already. makedirs(config_dir, exist_ok=True) try: From 6e4c3d6acd1f6ee0891c25543188455c4a12c6f6 Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Sat, 7 Dec 2019 21:18:28 -0600 Subject: [PATCH 45/46] tests: require keys on analytics report schema --- tests/unit/test_analytics.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index ece2501a9a..a7163c48a9 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -56,7 +56,8 @@ def test_runtime_info(tmp_global_config): "scm_class": Any("Git", None), "user_id": string, "system_info": dict, - } + }, + required=True, ) assert schema(analytics._runtime_info()) @@ -98,7 +99,7 @@ def test_is_enabled(dvc_repo, config, result, monkeypatch, tmp_global_config): def test_system_info(): - schema = Schema({"os": Any("windows", "mac", "linux")}) + schema = Schema({"os": Any("windows", "mac", "linux")}, required=True) system = platform.system() From 5dd63008783f3a6fe3dbaa9244b6b5acb8aa656d Mon Sep 17 00:00:00 2001 From: "Mr. Outis" Date: Sat, 7 Dec 2019 21:19:54 -0600 Subject: [PATCH 46/46] :nail_care: black --- tests/func/test_analytics.py | 2 +- tests/unit/test_analytics.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/func/test_analytics.py b/tests/func/test_analytics.py index 6004c64127..a2eccfd62f 100644 --- a/tests/func/test_analytics.py +++ b/tests/func/test_analytics.py @@ -14,7 +14,7 @@ def test_daemon_analytics(mock_send, tmp_path): @mock.patch("dvc.analytics.collect_and_send_report") @mock.patch("dvc.analytics.is_enabled", return_value=True) -def test_main_analytics(mock_is_enabled, mock_report, dvc_repo): +def test_main_analytics(mock_is_enabled, mock_report, dvc_repo): assert 0 == main(["add", "foo"]) assert mock_is_enabled.called assert mock_report.called diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py index a7163c48a9..43eb37dcbc 100644 --- a/tests/unit/test_analytics.py +++ b/tests/unit/test_analytics.py @@ -36,7 +36,7 @@ def test_collect_and_send_report(mock_json, mock_daemon, tmp_global_config): with pytest.raises(KeyError): report["cmd_return_code"] - args = parse_args(['add', 'foo']) + args = parse_args(["add", "foo"]) return_code = 0 analytics.collect_and_send_report(args, return_code)