-
Notifications
You must be signed in to change notification settings - Fork 1.3k
analytics: refactor into a module #2826
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ef1f038
b618d03
7d3cbd7
64d102c
2e7a6d3
5cb437b
610d1ee
d958870
34d3004
5261ed7
467b110
48b4cba
6bc2403
f995204
2a5cac5
e9d56a9
787ea9c
e69fffa
656f986
946dfc1
5596015
2c8e149
ca485db
e0d47a1
8b5ca25
8a32b28
624cbb2
285620d
02eaa9a
ede0103
7a3aae1
05a6868
71ead3f
7e8bdbd
bc2471c
c82d9dc
f704e54
da064e2
5a80334
2145daa
c50bd17
146e75b
c9f958d
3244f10
6e4c3d6
5dd6300
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,256 +1,167 @@ | ||
| """Collect and send usage analytics""" | ||
| from __future__ import unicode_literals | ||
|
|
||
| import errno | ||
| import json | ||
| import logging | ||
| import os | ||
| import platform | ||
| import requests | ||
| import sys | ||
| import tempfile | ||
| import uuid | ||
|
|
||
| import distro | ||
|
|
||
| from dvc import __version__ | ||
| from dvc.utils import env2bool | ||
| from dvc.utils.compat import str | ||
| from dvc.config import Config, to_bool | ||
| from dvc.daemon import daemon | ||
| from dvc.exceptions import NotDvcRepoError | ||
| from dvc.lock import Lock, LockError | ||
| from dvc.repo import Repo | ||
| from dvc.scm import SCM | ||
| from dvc.utils import env2bool, is_binary, makedirs | ||
efiop marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| from dvc.utils.compat import str, FileNotFoundError | ||
|
|
||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class Analytics(object): | ||
| """Class for collecting and sending usage analytics. | ||
|
|
||
| Args: | ||
| info (dict): optional existing analytics report. | ||
| def collect_and_send_report(args=None, return_code=None): | ||
| """ | ||
| Collect information from the runtime/environment and the command | ||
| being executed into a report and send it over the network. | ||
|
|
||
| URL = "https://analytics.dvc.org" | ||
| TIMEOUT_POST = 5 | ||
| To prevent analytics from blocking the execution of the main thread, | ||
| sending the report is done in a separate process. | ||
|
|
||
| USER_ID_FILE = "user_id" | ||
| The inter-process communication happens through a file containing the | ||
| report as a JSON, where the _collector_ generates it and the _sender_ | ||
| removes it after sending it. | ||
| """ | ||
| report = _runtime_info() | ||
|
|
||
| PARAM_DVC_VERSION = "dvc_version" | ||
| PARAM_USER_ID = "user_id" | ||
| PARAM_SYSTEM_INFO = "system_info" | ||
| # Include command execution information on the report only when available. | ||
| if args and hasattr(args, "func"): | ||
| report.update({"cmd_class": args.func.__name__}) | ||
|
|
||
| PARAM_OS = "os" | ||
| if return_code is not None: | ||
| report.update({"cmd_return_code": return_code}) | ||
|
|
||
| PARAM_WINDOWS_VERSION_MAJOR = "windows_version_major" | ||
| PARAM_WINDOWS_VERSION_MINOR = "windows_version_minor" | ||
| PARAM_WINDOWS_VERSION_BUILD = "windows_version_build" | ||
| PARAM_WINDOWS_VERSION_SERVICE_PACK = "windows_version_service_pack" | ||
| with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj: | ||
| json.dump(report, fobj) | ||
| daemon(["analytics", fobj.name]) | ||
|
|
||
| PARAM_MAC_VERSION = "mac_version" | ||
|
|
||
| PARAM_LINUX_DISTRO = "linux_distro" | ||
| PARAM_LINUX_DISTRO_VERSION = "linux_distro_version" | ||
| PARAM_LINUX_DISTRO_LIKE = "linux_distro_like" | ||
| def is_enabled(): | ||
| if env2bool("DVC_TEST"): | ||
| return False | ||
|
|
||
| PARAM_SCM_CLASS = "scm_class" | ||
| PARAM_IS_BINARY = "is_binary" | ||
| PARAM_CMD_CLASS = "cmd_class" | ||
| PARAM_CMD_RETURN_CODE = "cmd_return_code" | ||
| enabled = to_bool( | ||
| Config(validate=False) | ||
| .config.get(Config.SECTION_CORE, {}) | ||
| .get(Config.SECTION_CORE_ANALYTICS, "true") | ||
| ) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks ridiculous that we need to make this that hard. Why can't we just: enabled = Config().config['core']['analytics']
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IIRC In the current form we can only rely on
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't get what's wrong with my approach? It uses a validator, which will create |
||
|
|
||
| def __init__(self, info=None): | ||
| from dvc.config import Config | ||
| from dvc.lock import Lock | ||
| logger.debug("Analytics is {}abled.".format("en" if enabled else "dis")) | ||
|
|
||
| if info is None: | ||
| info = {} | ||
| return enabled | ||
|
|
||
| self.info = info | ||
|
|
||
| cdir = Config.get_global_config_dir() | ||
| try: | ||
| os.makedirs(cdir) | ||
| except OSError as exc: | ||
| if exc.errno != errno.EEXIST: | ||
| raise | ||
| def send(report): | ||
| """ | ||
| Side effect: Removes the report after sending it. | ||
|
|
||
| self.user_id_file = os.path.join(cdir, self.USER_ID_FILE) | ||
| self.user_id_file_lock = Lock(self.user_id_file + ".lock") | ||
| The report is generated and stored in a temporary file, see: | ||
| `collect_and_send_report`. Sending happens on another process, | ||
| thus, the need of removing such file afterwards. | ||
| """ | ||
| url = "https://analytics.dvc.org" | ||
| headers = {"content-type": "application/json"} | ||
|
|
||
| @staticmethod | ||
| def load(path): | ||
| """Loads analytics report from json file specified by path. | ||
| with open(report, "rb") as fobj: | ||
| requests.post(url, data=fobj, headers=headers, timeout=5) | ||
|
|
||
| Args: | ||
| path (str): path to json file with analytics report. | ||
| """ | ||
| with open(path, "r") as fobj: | ||
| analytics = Analytics(info=json.load(fobj)) | ||
| os.unlink(path) | ||
| return analytics | ||
| os.remove(report) | ||
|
|
||
| def _write_user_id(self): | ||
| import uuid | ||
|
|
||
| with open(self.user_id_file, "w+") as fobj: | ||
| user_id = str(uuid.uuid4()) | ||
| info = {self.PARAM_USER_ID: user_id} | ||
| json.dump(info, fobj) | ||
| return user_id | ||
| def _scm_in_use(): | ||
| try: | ||
| scm = SCM(root_dir=Repo.find_root()) | ||
| return type(scm).__name__ | ||
| except NotDvcRepoError: | ||
| pass | ||
|
|
||
| def _read_user_id(self): | ||
| if not os.path.exists(self.user_id_file): | ||
| return None | ||
|
|
||
| with open(self.user_id_file, "r") as fobj: | ||
| try: | ||
| info = json.load(fobj) | ||
| except ValueError as exc: | ||
| logger.debug("Failed to load user_id: {}".format(exc)) | ||
| return None | ||
|
|
||
| return info[self.PARAM_USER_ID] | ||
|
|
||
| def _get_user_id(self): | ||
| from dvc.lock import LockError | ||
| def _runtime_info(): | ||
| """ | ||
| Gather information from the environment where DVC runs to fill a report. | ||
| """ | ||
| return { | ||
| "dvc_version": __version__, | ||
| "is_binary": is_binary(), | ||
| "scm_class": _scm_in_use(), | ||
| "system_info": _system_info(), | ||
| "user_id": _find_or_create_user_id(), | ||
| } | ||
|
|
||
| try: | ||
| with self.user_id_file_lock: | ||
| user_id = self._read_user_id() | ||
| if user_id is None: | ||
| user_id = self._write_user_id() | ||
| return user_id | ||
| except LockError: | ||
| msg = "Failed to acquire '{}'" | ||
| logger.debug(msg.format(self.user_id_file_lock.lockfile)) | ||
|
|
||
| def _collect_windows(self): | ||
| import sys | ||
|
|
||
| version = sys.getwindowsversion() # pylint: disable=no-member | ||
| info = {} | ||
| info[self.PARAM_OS] = "windows" | ||
| info[self.PARAM_WINDOWS_VERSION_MAJOR] = version.major | ||
| info[self.PARAM_WINDOWS_VERSION_MINOR] = version.minor | ||
| info[self.PARAM_WINDOWS_VERSION_BUILD] = version.build | ||
| info[self.PARAM_WINDOWS_VERSION_SERVICE_PACK] = version.service_pack | ||
| return info | ||
|
|
||
| def _collect_darwin(self): | ||
| import platform | ||
|
|
||
| info = {} | ||
| info[self.PARAM_OS] = "mac" | ||
| info[self.PARAM_MAC_VERSION] = platform.mac_ver()[0] | ||
| return info | ||
|
|
||
| def _collect_linux(self): | ||
| import distro | ||
|
|
||
| info = {} | ||
| info[self.PARAM_OS] = "linux" | ||
| info[self.PARAM_LINUX_DISTRO] = distro.id() | ||
| info[self.PARAM_LINUX_DISTRO_VERSION] = distro.version() | ||
| info[self.PARAM_LINUX_DISTRO_LIKE] = distro.like() | ||
| return info | ||
|
|
||
| def _collect_system_info(self): | ||
| import platform | ||
|
|
||
| system = platform.system() | ||
| def _system_info(): | ||
| system = platform.system() | ||
|
|
||
| if system == "Windows": | ||
| return self._collect_windows() | ||
| if system == "Windows": | ||
| version = sys.getwindowsversion() | ||
|
|
||
| if system == "Darwin": | ||
| return self._collect_darwin() | ||
|
|
||
| if system == "Linux": | ||
| return self._collect_linux() | ||
|
|
||
| raise NotImplementedError | ||
|
|
||
| def collect(self): | ||
| """Collect analytics report.""" | ||
| from dvc.scm import SCM | ||
| from dvc.utils import is_binary | ||
| from dvc.repo import Repo | ||
| from dvc.exceptions import NotDvcRepoError | ||
|
|
||
| self.info[self.PARAM_DVC_VERSION] = __version__ | ||
| self.info[self.PARAM_IS_BINARY] = is_binary() | ||
| self.info[self.PARAM_USER_ID] = self._get_user_id() | ||
|
|
||
| self.info[self.PARAM_SYSTEM_INFO] = self._collect_system_info() | ||
|
|
||
| try: | ||
| scm = SCM(root_dir=Repo.find_root()) | ||
| self.info[self.PARAM_SCM_CLASS] = type(scm).__name__ | ||
| except NotDvcRepoError: | ||
| pass | ||
|
|
||
| def collect_cmd(self, args, ret): | ||
| """Collect analytics info from a CLI command.""" | ||
| from dvc.command.daemon import CmdDaemonAnalytics | ||
|
|
||
| assert isinstance(ret, int) or ret is None | ||
|
|
||
| if ret is not None: | ||
| self.info[self.PARAM_CMD_RETURN_CODE] = ret | ||
|
|
||
| if args is not None and hasattr(args, "func"): | ||
| assert args.func != CmdDaemonAnalytics | ||
| self.info[self.PARAM_CMD_CLASS] = args.func.__name__ | ||
|
|
||
| def dump(self): | ||
| """Save analytics report to a temporary file. | ||
|
|
||
| Returns: | ||
| str: path to the temporary file that contains the analytics report. | ||
| """ | ||
| import tempfile | ||
| return { | ||
| "os": "windows", | ||
| "windows_version_build": version.build, | ||
| "windows_version_major": version.major, | ||
| "windows_version_minor": version.minor, | ||
| "windows_version_service_pack": version.service_pack, | ||
| } | ||
|
|
||
| with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj: | ||
| json.dump(self.info, fobj) | ||
| return fobj.name | ||
| if system == "Darwin": | ||
| return {"os": "mac", "mac_version": platform.mac_ver()[0]} | ||
|
|
||
| @staticmethod | ||
| def is_enabled(cmd=None): | ||
| from dvc.config import Config, to_bool | ||
| from dvc.command.daemon import CmdDaemonBase | ||
| if system == "Linux": | ||
| return { | ||
| "os": "linux", | ||
| "linux_distro": distro.id(), | ||
| "linux_distro_like": distro.like(), | ||
| "linux_distro_version": distro.version(), | ||
| } | ||
|
|
||
| if env2bool("DVC_TEST"): | ||
| return False | ||
| # We don't collect data for any other system. | ||
| raise NotImplementedError | ||
|
|
||
| if isinstance(cmd, CmdDaemonBase): | ||
| return False | ||
|
|
||
| core = Config(validate=False).config.get(Config.SECTION_CORE, {}) | ||
| enabled = to_bool(core.get(Config.SECTION_CORE_ANALYTICS, "true")) | ||
| logger.debug( | ||
| "Analytics is {}.".format("enabled" if enabled else "disabled") | ||
| ) | ||
| return enabled | ||
| def _find_or_create_user_id(): | ||
| """ | ||
| The user's ID is stored on a file under the global config directory. | ||
|
|
||
| @staticmethod | ||
| def send_cmd(cmd, args, ret): | ||
| """Collect and send analytics for CLI command. | ||
| The file should contain a JSON with a "user_id" key: | ||
|
|
||
| Args: | ||
| args (list): parsed args for the CLI command. | ||
| ret (int): return value of the CLI command. | ||
| """ | ||
| from dvc.daemon import daemon | ||
| {"user_id": "16fd2706-8baf-433b-82eb-8c7fada847da"} | ||
|
|
||
| if not Analytics.is_enabled(cmd): | ||
| return | ||
| IDs are generated randomly with UUID. | ||
| """ | ||
| config_dir = Config.get_global_config_dir() | ||
| fname = os.path.join(config_dir, "user_id") | ||
| lockfile = os.path.join(config_dir, "user_id.lock") | ||
|
|
||
| analytics = Analytics() | ||
| analytics.collect_cmd(args, ret) | ||
| daemon(["analytics", analytics.dump()]) | ||
| # Since the `fname` and `lockfile` are under the global config, | ||
| # we need to make sure such directory exist already. | ||
| makedirs(config_dir, exist_ok=True) | ||
|
|
||
| def send(self): | ||
| """Collect and send analytics.""" | ||
| import requests | ||
| try: | ||
| with Lock(lockfile): | ||
| try: | ||
| with open(fname, "r") as fobj: | ||
| user_id = json.load(fobj)["user_id"] | ||
|
|
||
| if not self.is_enabled(): | ||
| return | ||
| except (FileNotFoundError, ValueError, KeyError): | ||
| user_id = str(uuid.uuid4()) | ||
|
|
||
| self.collect() | ||
| with open(fname, "w") as fobj: | ||
| json.dump({"user_id": user_id}, fobj) | ||
|
|
||
| logger.debug("Sending analytics: {}".format(self.info)) | ||
| return user_id | ||
|
|
||
| try: | ||
| requests.post(self.URL, json=self.info, timeout=self.TIMEOUT_POST) | ||
| except requests.exceptions.RequestException as exc: | ||
| logger.debug("Failed to send analytics: {}".format(str(exc))) | ||
| except LockError: | ||
| logger.debug("Failed to acquire {lockfile}".format(lockfile=lockfile)) | ||
Uh oh!
There was an error while loading. Please reload this page.