From a5bcdc9d52ba77dcb3f710a9eb969059faf5bea5 Mon Sep 17 00:00:00 2001 From: idomic Date: Mon, 17 Jan 2022 15:00:30 -0500 Subject: [PATCH] Stats fixes/updates linting linting Fixing tests Changing to argv from args lint test_parse_dag test_parse_dag Review fixes Added field for missing UIDs Additional function to get home dir --- setup.py | 1 + src/ploomber/cli/build.py | 13 +- src/ploomber/cli/cli.py | 60 ++++++- src/ploomber/cli/examples.py | 17 ++ src/ploomber/cli/install.py | 79 ++++++--- src/ploomber/cli/interact.py | 14 +- src/ploomber/cli/nb.py | 28 +++- src/ploomber/cli/parsers.py | 2 - src/ploomber/cli/plot.py | 6 +- src/ploomber/cli/report.py | 6 +- src/ploomber/cli/status.py | 3 +- src/ploomber/cli/task.py | 6 +- src/ploomber/telemetry/telemetry.py | 239 +++++++++++++++++++++------- tests/telemetry/test_telemetry.py | 106 ++++++++++++ 14 files changed, 479 insertions(+), 101 deletions(-) diff --git a/setup.py b/setup.py index 2ff3a3bdb..4e1da3a30 100644 --- a/setup.py +++ b/setup.py @@ -153,6 +153,7 @@ def read(name): 'humanize', 'tqdm', 'posthog', + 'distro', 'importlib_resources;python_version<"3.7"', # for code normalization, parso is also needed for inferring upstream # dependencies in jupyter notebooks diff --git a/src/ploomber/cli/build.py b/src/ploomber/cli/build.py index 181271a1d..47822e965 100644 --- a/src/ploomber/cli/build.py +++ b/src/ploomber/cli/build.py @@ -40,7 +40,14 @@ def main(render_only=False): # users may try to run "ploomber build {name}" to build a single task if len(sys.argv) > 1 and not sys.argv[1].startswith('-'): suggestion = 'ploomber task {task-name}' - parser.error(f'{parser.prog!r} does not take positional arguments.\n' + cmd_name = parser.prog + telemetry.log_api("unsupported_build_cmd", + metadata={ + 'cmd_name': cmd_name, + 'suggestion': suggestion, + 'argv': sys.argv + }) + parser.error(f'{cmd_name!r} does not take positional arguments.\n' f'To build a single task, try: {suggestion!r}') dag, args = parser.load_from_entry_point_arg() @@ -65,5 +72,7 @@ def main(render_only=False): print(report) end_time = datetime.datetime.now() telemetry.log_api("ploomber_build", - total_runtime=str(end_time - start_time)) + total_runtime=str(end_time - start_time), + dag=dag, + metadata={'argv': sys.argv}) return dag diff --git a/src/ploomber/cli/cli.py b/src/ploomber/cli/cli.py index b01c02e1d..bb35f9d69 100644 --- a/src/ploomber/cli/cli.py +++ b/src/ploomber/cli/cli.py @@ -48,13 +48,34 @@ def scaffold(conda, package, entry_point, empty): template = '-e/--entry-point is not compatible with the {flag} flag' if entry_point and conda: - raise click.ClickException(template.format(flag='--conda')) + err = template.format(flag='--conda') + telemetry.log_api("scaffold_error", + metadata={ + 'type': 'entry_and_conda_flag', + 'exception': err, + 'argv': sys.argv + }) + raise click.ClickException(err) if entry_point and package: - raise click.ClickException(template.format(flag='--package')) + err = template.format(flag='--package') + telemetry.log_api("scaffold_error", + metadata={ + 'type': 'entry_and_package_flag', + 'exception': err, + 'argv': sys.argv + }) + raise click.ClickException(err) if entry_point and empty: - raise click.ClickException(template.format(flag='--empty')) + err = template.format(flag='--empty') + telemetry.log_api("scaffold_error", + metadata={ + 'type': 'entry_and_empty_flag', + 'exception': err, + 'argv': sys.argv + }) + raise click.ClickException(err) # try to load a dag by looking in default places if entry_point is None: @@ -67,14 +88,31 @@ def scaffold(conda, package, entry_point, empty): Path(entry_point), ) except Exception as e: + telemetry.log_api("scaffold_error", + metadata={ + 'type': 'dag_load_failed', + 'exception': e, + 'argv': sys.argv + }) raise click.ClickException(e) from e if loaded: # existing pipeline, add tasks spec, _, path_to_spec = loaded _scaffold.add(spec, path_to_spec) + telemetry.log_api("ploomber_scaffold", + dag=loaded, + metadata={ + 'type': 'add_task', + 'argv': sys.argv + }) else: # no pipeline, create base project + telemetry.log_api("ploomber_scaffold", + metadata={ + 'type': 'base_project', + 'argv': sys.argv + }) scaffold_project.cli(project_path=None, conda=conda, package=package, @@ -109,6 +147,12 @@ def examples(name, force, branch, output): except click.ClickException: raise except Exception as e: + telemetry.log_api("examples_error", + metadata={ + 'type': 'runtime_error', + 'exception': e, + 'argv': sys.argv + }) raise RuntimeError( 'An error happened when executing the examples command. Check out ' 'the full error message for details. Downloading the examples ' @@ -150,11 +194,19 @@ def cmd_router(): fn() elif cmd_name in alias: suggestion = alias[cmd_name] + telemetry.log_api("unsupported_build_cmd", + metadata={ + 'cmd_name': cmd_name, + 'suggestion': suggestion, + 'argv': sys.argv + }) _exit_with_error_message("Try 'ploomber --help' for help.\n\n" f"Error: {cmd_name!r} is not a valid command." f" Did you mean {suggestion!r}?") else: - telemetry.log_api("unsupported-api-call", metadata={'argv': sys.argv}) + if cmd_name not in ['examples', 'scaffold']: + telemetry.log_api("unsupported-api-call", + metadata={'argv': sys.argv}) cli() diff --git a/src/ploomber/cli/examples.py b/src/ploomber/cli/examples.py index 13919aff2..ac4897fe6 100644 --- a/src/ploomber/cli/examples.py +++ b/src/ploomber/cli/examples.py @@ -2,6 +2,7 @@ import stat import os import csv +import sys from datetime import datetime import json import shutil @@ -13,6 +14,7 @@ from ploomber.io.terminalwriter import TerminalWriter from ploomber.table import Table +from ploomber.telemetry import telemetry from pygments.formatters.terminal import TerminalFormatter from pygments.lexers.markup import MarkdownLexer @@ -250,6 +252,11 @@ def main(name, force=False, branch=None, output=None): selected = manager.path_to(name) if not selected.exists(): + telemetry.log_api("examples_error", + metadata={ + 'type': 'wrong_example_name', + 'example_name': name + }) click.echo(f'There is no example named {name!r}.\n' 'To list examples: ploomber examples\n' 'To update local copy: ploomber examples -f') @@ -259,6 +266,11 @@ def main(name, force=False, branch=None, output=None): tw.sep('=', f'Copying example {name!r} to {output}/', green=True) if Path(output).exists(): + telemetry.log_api("examples_error", + metadata={ + 'type': 'duplicated_output', + 'output_name': output + }) raise click.ClickException( f"{output!r} already exists in the current working " "directory, please rename it or move it " @@ -279,3 +291,8 @@ def main(name, force=False, branch=None, output=None): f'\n* conda env create -f environment.yml' f'\n* pip install -r requirements.txt\n') tw.sep('=', blue=True) + telemetry.log_api("ploomber_examples", + metadata={ + 'argv': sys.argv, + 'example_name': name + }) diff --git a/src/ploomber/cli/install.py b/src/ploomber/cli/install.py index a55460357..031832988 100644 --- a/src/ploomber/cli/install.py +++ b/src/ploomber/cli/install.py @@ -45,29 +45,41 @@ def main(use_lock): err = ("Expected and environment.lock.yaml " "(conda) or requirements.lock.txt (pip) in the current " "directory. Add one of them and try again.") - telemetry.log_api("install-exception-lock", - metadata={'Exception': err}) + telemetry.log_api("install-error", + metadata={ + 'type': 'no_lock', + 'exception': err + }) raise exceptions.ClickException(err) elif not use_lock and not HAS_ENV_YML and not HAS_REQS_TXT: err = ("Expected an environment.yaml (conda)" " or requirements.txt (pip) in the current directory." " Add one of them and try again.") - telemetry.log_api("install-exception-requirements", - metadata={'Exception': err}) + telemetry.log_api("install-error", + metadata={ + 'type': 'no_env_requirements', + 'exception': err + }) raise exceptions.ClickException(err) elif (not HAS_CONDA and use_lock and HAS_ENV_LOCK_YML and not HAS_REQS_LOCK_TXT): err = ("Found env environment.lock.yaml " "but conda is not installed. Install conda or add a " "requirements.lock.txt to use pip instead") - telemetry.log_api("install-exception-conda", - metadata={'Exception': err}) + telemetry.log_api("install-error", + metadata={ + 'type': 'no_conda', + 'exception': err + }) raise exceptions.ClickException(err) elif not HAS_CONDA and not use_lock and HAS_ENV_YML and not HAS_REQS_TXT: err = ("Found environment.yaml but conda is not installed." " Install conda or add a requirements.txt to use pip instead") - telemetry.log_api("install-exception-conda2", - metadata={'Exception': err}) + telemetry.log_api("install-error", + metadata={ + 'type': 'no_conda2', + 'exception': err + }) raise exceptions.ClickException(err) elif HAS_CONDA and use_lock and HAS_ENV_LOCK_YML: main_conda(start_time, use_lock=True) @@ -154,10 +166,16 @@ def main_conda(start_time, use_lock): current_env = Path(shutil.which('python')).parents[1].name if env_name == current_env: - raise RuntimeError(f'{env_yml} will create an environment ' - f'named {env_name!r}, which is the current active ' - 'environment. Move to a different one and try ' - 'again (e.g., "conda activate base")') + err = (f'{env_yml} will create an environment ' + f'named {env_name!r}, which is the current active ' + 'environment. Move to a different one and try ' + 'again (e.g., "conda activate base")') + telemetry.log_api("install-error", + metadata={ + 'type': 'env_running_conflict', + 'exception': err + }) + raise RuntimeError(err) # get current installed envs conda = shutil.which('conda') @@ -175,9 +193,15 @@ def main_conda(start_time, use_lock): ]) if already_installed: - raise ValueError(f'Environment {env_name!r} already exists, ' - f'delete it and try again ' - f'(conda env remove --name {env_name})') + err = (f'Environment {env_name!r} already exists, ' + f'delete it and try again ' + f'(conda env remove --name {env_name})') + telemetry.log_api("install-error", + metadata={ + 'type': 'duplicate_env', + 'exception': err + }) + raise ValueError(err) pkg_manager = mamba if mamba else conda cmdr.run(pkg_manager, @@ -225,11 +249,15 @@ def _find_conda_root(conda_bin): # on linux miniconda3, anaconda and miniconda if parent.name.lower() in {'miniconda3', 'miniconda', 'anaconda3'}: return parent - - raise RuntimeError( - 'Failed to locate conda root from ' - f'directory: {str(conda_bin)!r}. Please submit an issue: ' - 'https://github.com/ploomber/ploomber/issues/new') + err = ('Failed to locate conda root from ' + f'directory: {str(conda_bin)!r}. Please submit an issue: ' + 'https://github.com/ploomber/ploomber/issues/new') + telemetry.log_api("install-error", + metadata={ + 'type': 'no_conda_root', + 'exception': err + }) + raise RuntimeError(err) def _path_to_pip_in_env_with_name(conda_bin, env_name): @@ -246,9 +274,14 @@ def _locate_pip_inside_conda(env_name): # this might happen if the environment does not contain python/pip if not Path(pip).exists(): - raise FileNotFoundError( - f'Could not locate pip in environment {env_name!r}, make sure ' - 'it is included in your environment.yml and try again') + err = (f'Could not locate pip in environment {env_name!r}, make sure ' + 'it is included in your environment.yml and try again') + telemetry.log_api("install-error", + metadata={ + 'type': 'no_pip_env', + 'exception': err + }) + raise FileNotFoundError(err) return pip diff --git a/src/ploomber/cli/interact.py b/src/ploomber/cli/interact.py index 9b5c051e5..3635bfdcc 100644 --- a/src/ploomber/cli/interact.py +++ b/src/ploomber/cli/interact.py @@ -20,12 +20,20 @@ def main(): try: dag.render() except Exception: - print('Your dag failed to render, but you can still inspect the ' - 'object to debug it.\n') + err = ('Your dag failed to render, but you can still inspect the ' + 'object to debug it.\n') + telemetry.log_api("interact_error", + dag=dag, + metadata={ + 'type': 'dag_render_failed', + 'exception': err + }) + print(err) end_time = datetime.datetime.now() telemetry.log_api("ploomber_interact", - total_runtime=str(end_time - start_time)) + total_runtime=str(end_time - start_time), + dag=dag) # NOTE: do not use embed here, we must use start_ipython, see here: # https://github.com/ipython/ipython/issues/8918 diff --git a/src/ploomber/cli/nb.py b/src/ploomber/cli/nb.py index 5d0347eb3..95785070f 100644 --- a/src/ploomber/cli/nb.py +++ b/src/ploomber/cli/nb.py @@ -1,11 +1,13 @@ import shutil from pathlib import Path import stat +import sys import click from ploomber.cli.parsers import CustomParser from ploomber.cli.io import command_endpoint +from ploomber.telemetry import telemetry def _call_in_source(dag, method_name, message, kwargs=None): @@ -124,8 +126,14 @@ def main(): dag.render(show_progress=False) if loading_error: - raise RuntimeError('Could not run nb command: the DAG ' - 'failed to load') from loading_error + err = ('Could not run nb command: the DAG ' 'failed to load') + telemetry.log_api("nb_error", + metadata={ + 'type': 'dag_load_failed', + 'exception': err + f' {loading_error}', + 'argv': sys.argv + }) + raise RuntimeError(err) from loading_error if args.format: new_paths = [ @@ -170,14 +178,20 @@ def main(): 'Paired notebooks', dict(base_path=args.pair), ) - click.echo(f'Finshed pairing notebooks. Tip: add {args.pair!r} to ' + click.echo(f'Finished pairing notebooks. Tip: add {args.pair!r} to ' 'your .gitignore to keep your repository clean') if args.install_hook: if not Path('.git').is_dir(): - raise NotADirectoryError( - 'Expected a .git/ directory in the current working ' - 'directory. Run this from the repository root directory.') + err = ('Expected a .git/ directory in the current working ' + 'directory. Run this from the repository root directory.') + telemetry.log_api("nb_error", + metadata={ + 'type': 'no_git_config', + 'exception': err, + 'argv': sys.argv + }) + raise NotADirectoryError(err) parent = Path('.git', 'hooks') parent.mkdir(exist_ok=True) @@ -194,3 +208,5 @@ def main(): if args.uninstall_hook: _delete_hook(Path('.git', 'hooks', 'pre-commit')) _delete_hook(Path('.git', 'hooks', 'post-commit')) + + telemetry.log_api("ploomber_nb", dag=dag, metadata={'argv': sys.argv}) diff --git a/src/ploomber/cli/parsers.py b/src/ploomber/cli/parsers.py index 5659b5707..876926878 100644 --- a/src/ploomber/cli/parsers.py +++ b/src/ploomber/cli/parsers.py @@ -38,7 +38,6 @@ class CustomMutuallyExclusiveGroup(argparse._MutuallyExclusiveGroup): A subclass of argparse._MutuallyExclusiveGroup that determines whether the added args should go into the static of dynamic API """ - def add_argument(self, *args, **kwargs): if not self._container.finished_static_api: if (not self._container.in_context @@ -66,7 +65,6 @@ class CustomParser(argparse.ArgumentParser): manager, only after this has happened, other arguments can be added using parser.add_argument. """ - def __init__(self, *args, **kwargs): self.DEFAULT_ENTRY_POINT = default.try_to_find_entry_point() diff --git a/src/ploomber/cli/plot.py b/src/ploomber/cli/plot.py index f7d7f103b..0a718e623 100644 --- a/src/ploomber/cli/plot.py +++ b/src/ploomber/cli/plot.py @@ -1,3 +1,5 @@ +import sys + from ploomber.cli.parsers import CustomParser from ploomber.cli.io import cli_endpoint from ploomber.util.default import extract_name @@ -27,5 +29,7 @@ def main(): dag.plot(output=output) end_time = datetime.datetime.now() telemetry.log_api("ploomber_plot", - total_runtime=str(end_time - start_time)) + total_runtime=str(end_time - start_time), + dag=dag, + metadata={'argv': sys.argv}) print('Plot saved at:', output) diff --git a/src/ploomber/cli/report.py b/src/ploomber/cli/report.py index ca4c18cc7..90a72a331 100644 --- a/src/ploomber/cli/report.py +++ b/src/ploomber/cli/report.py @@ -1,3 +1,5 @@ +import sys + from ploomber.cli.parsers import CustomParser from ploomber.cli.io import cli_endpoint from ploomber.telemetry import telemetry @@ -19,5 +21,7 @@ def main(): dag.to_markup(path=args.output) end_time = datetime.datetime.now() telemetry.log_api("ploomber_report", - total_runtime=str(end_time - start_time)) + total_runtime=str(end_time - start_time), + dag=dag, + metadata={'argv': sys.argv}) print('Report saved at:', args.output) diff --git a/src/ploomber/cli/status.py b/src/ploomber/cli/status.py index 5c5e4f83f..ce94a2714 100644 --- a/src/ploomber/cli/status.py +++ b/src/ploomber/cli/status.py @@ -16,4 +16,5 @@ def main(): print(dag.status()) end_time = datetime.datetime.now() telemetry.log_api("ploomber_status", - total_runtime=str(end_time - start_time)) + total_runtime=str(end_time - start_time), + dag=dag) diff --git a/src/ploomber/cli/task.py b/src/ploomber/cli/task.py index 3104cf257..20fd622ce 100644 --- a/src/ploomber/cli/task.py +++ b/src/ploomber/cli/task.py @@ -1,3 +1,5 @@ +import sys + from ploomber.cli.parsers import CustomParser from ploomber.cli.io import cli_endpoint from ploomber.telemetry import telemetry @@ -57,4 +59,6 @@ def main(): end_time = datetime.datetime.now() telemetry.log_api("ploomber_task", - total_runtime=str(end_time - start_time)) + total_runtime=str(end_time - start_time), + dag=dag, + metadata={'argv': sys.argv}) diff --git a/src/ploomber/telemetry/telemetry.py b/src/ploomber/telemetry/telemetry.py index 00c945900..b717582eb 100644 --- a/src/ploomber/telemetry/telemetry.py +++ b/src/ploomber/telemetry/telemetry.py @@ -31,17 +31,21 @@ import datetime import http.client as httplib +import warnings + import posthog import yaml import os from pathlib import Path import sys import uuid + from ploomber.telemetry import validate_inputs from ploomber import __version__ import platform +import distro -TELEMETRY_VERSION = '0.2' +TELEMETRY_VERSION = '0.3' DEFAULT_HOME_DIR = '~/.ploomber' CONF_DIR = "stats" posthog.project_api_key = 'phc_P9SpSeypyPwxrMdFn2edOOEooQioF2axppyEeDwtMSP' @@ -69,11 +73,14 @@ def is_online(): # Will output if the code is within a container def is_docker(): - cgroup = Path('/proc/self/cgroup') - docker_env = Path('/.dockerenv') - return (docker_env.exists() or cgroup.exists() - and any('docker' in line - for line in cgroup.read_text().splitlines())) + try: + cgroup = Path('/proc/self/cgroup') + docker_env = Path('/.dockerenv') + return (docker_env.exists() or cgroup.exists() + and any('docker' in line + for line in cgroup.read_text().splitlines())) + except OSError: + return False def get_os(): @@ -87,6 +94,36 @@ def get_os(): return os +def test(): + """ + Returns: + A dict of system information. + """ + os = platform.system() + if os == "Darwin": + return {"os": "mac", "mac_version": platform.mac_ver()[0]} + + if os == "Windows": + release, version, csd, platform_type = platform.win32_ver() + return { + "os": "windows", + "windows_version_release": release, + "windows_version": version, + "windows_version_service_pack": csd, + "windows_version_os_type": platform_type, + } + + if os == "Linux": + return { + "os": "linux", + "linux_distro": distro.id(), + "linux_distro_like": distro.like(), + "linux_distro_version": distro.version(), + } + + return {"os": os} + + def is_conda(): """ The function will tell if the code is running in a conda env @@ -100,8 +137,8 @@ def get_base_prefix_compat(): """ This function will find the pip virtualenv with different python versions. Get base/real prefix, or sys.prefix if there is none.""" - return getattr(sys, "base_prefix", None) or sys.prefix or \ - getattr(sys, "real_prefix", None) + return getattr(sys, "base_prefix", None) or sys.prefix or getattr( + sys, "real_prefix", None) def in_virtualenv(): @@ -109,6 +146,7 @@ def in_virtualenv(): def get_env(): + """Returns: The name of the virtual env if exists as str""" if in_virtualenv(): return 'pip' elif is_conda(): @@ -117,20 +155,88 @@ def get_env(): return 'local' +def is_colab(): + """Returns: True for Google Colab env""" + return "COLAB_GPU" in os.environ + + +def is_paperspace(): + """Returns: True for Paperspace env""" + return "PS_API_KEY" in os.environ or\ + "PAPERSPACE_API_KEY" in os.environ or\ + "PAPERSPACE_NOTEBOOK_REPO_ID" in os.environ + + +def is_slurm(): + """Returns: True for Slurm env""" + return "SLURM_JOB_ID" in os.environ + + +def is_airflow(): + """Returns: True for Airflow env""" + return "AIRFLOW_CONFIG" in os.environ or "AIRFLOW_HOME" in os.environ + + +def is_argo(): + """Returns: True for Airflow env""" + return "ARGO_AGENT_TASK_WORKERS" in os.environ or \ + "ARGO_KUBELET_PORT" in os.environ + + +def clean_tasks_upstream_products(input): + clean_input = {} + try: + product_items = input.items() + for product_item_name, product_item in product_items: + clean_input[product_item_name] = str(product_item).split("/")[-1] + except AttributeError: # Single product + return str(input.split("/")[-1]) + + return clean_input + + +def parse_dag(dag): + try: + dag_dict = {} + dag_dict["dag_size"] = len(dag) + tasks_list = list(dag) + if tasks_list: + dag_dict["tasks"] = {} + for task in tasks_list: + task_dict = {} + task_dict["status"] = dag[task]._exec_status.name + task_dict["type"] = str(type( + dag[task])).split(".")[-1].split("'")[0] + task_dict["upstream"] = clean_tasks_upstream_products( + dag[task].upstream) + task_dict["products"] = clean_tasks_upstream_products( + dag[task].product.to_json_serializable()) + dag_dict['tasks'][task] = task_dict + + return dag_dict + except Exception: + return None + + +def get_home_dir(): + """ + Checks if ploomber home was set through the env variable. + returns the actual home_dir path. + """ + return PLOOMBER_HOME_DIR if PLOOMBER_HOME_DIR else DEFAULT_HOME_DIR + + def check_dir_exist(input_location=None): """ Checks if a specific directory exists, creates if not. In case the user didn't set a custom dir, will turn to the default home """ - if PLOOMBER_HOME_DIR: - final_location = PLOOMBER_HOME_DIR - else: - final_location = DEFAULT_HOME_DIR + home_dir = get_home_dir() if input_location: - p = Path(final_location, input_location) + p = Path(home_dir, input_location) else: - p = Path(final_location) + p = Path(home_dir) p = p.expanduser() @@ -151,15 +257,17 @@ def check_uid(): with uid_path.open("w") as file: yaml.dump({"uid": uid}, file) return uid - except FileNotFoundError as e: - return f"ERROR: Can't read UID file: {e}" + except Exception as e: + warnings.warn(f"ERROR: Can't write UID file: {e}") + return f"NO_UID {e}" else: # read and return uid try: with uid_path.open("r") as file: uid_dict = yaml.safe_load(file) return uid_dict['uid'] - except FileNotFoundError as e: - return f"Error: Can't read UID file: {e}" + except Exception as e: + warnings.warn(f"Error: Can't read UID file: {e}") + return f"NO_UID {e}" def check_stats_enabled(): @@ -179,15 +287,17 @@ def check_stats_enabled(): with config_path.open("w") as file: yaml.dump({"stats_enabled": True}, file) return True - except FileNotFoundError as e: - return f"ERROR: Can't read file: {e}" + except Exception as e: + warnings.warn(f"ERROR: Can't write to config file: {e}") + return True else: # read and return config try: with config_path.open("r") as file: conf = yaml.safe_load(file) return conf['stats_enabled'] - except FileNotFoundError as e: - return f"Error: Can't read config file {e}" + except Exception as e: + warnings.warn(f"Error: Can't read config file {e}") + return True def check_first_time_usage(): @@ -230,7 +340,11 @@ def validate_entries(event_id, uid, action, client_time, total_runtime): return event_id, uid, action, client_time, elapsed_time -def log_api(action, client_time=None, total_runtime=None, metadata=None): +def log_api(action, + client_time=None, + total_runtime=None, + dag=None, + metadata={}): """ This function logs through an API call, assigns parameters if missing like timestamp, event id and stats information. @@ -240,9 +354,34 @@ def log_api(action, client_time=None, total_runtime=None, metadata=None): client_time = datetime.datetime.now() (telemetry_enabled, uid, is_install) = _get_telemetry_info() + if 'NO_UID' in uid: + metadata['uid_issue'] = uid + uid = None + py_version = python_version() docker_container = is_docker() - operating_system = get_os() + colab = is_colab() + if colab: + metadata['colab'] = colab + + paperspace = is_paperspace() + if paperspace: + metadata['paperspace'] = paperspace + + slurm = is_slurm() + if slurm: + metadata['slurm'] = slurm + + airflow = is_airflow() + if airflow: + metadata['airflow'] = airflow + + argo = is_argo() + if argo: + metadata['argo'] = argo + if dag: + metadata['dag'] = parse_dag(dag) + os = get_os() product_version = __version__ online = is_online() environment = get_env() @@ -254,39 +393,25 @@ def log_api(action, client_time=None, total_runtime=None, metadata=None): action, client_time, total_runtime) + props = { + 'event_id': event_id, + 'user_id': uid, + 'action': action, + 'client_time': str(client_time), + 'metadata': metadata, + 'total_runtime': total_runtime, + 'python_version': py_version, + 'ploomber_version': product_version, + 'docker_container': docker_container, + 'os': os, + 'environment': environment, + 'metadata': metadata, + 'telemetry_version': TELEMETRY_VERSION + } + if is_install: posthog.capture(distinct_id=uid, event='install_success_indirect', - properties={ - 'event_id': event_id, - 'user_id': uid, - 'action': action, - 'client_time': str(client_time), - 'metadata': metadata, - 'total_runtime': total_runtime, - 'python_version': py_version, - 'ploomber_version': product_version, - 'docker_container': docker_container, - 'operating_system': operating_system, - 'environment': environment, - 'metadata': metadata, - 'telemetry_version': TELEMETRY_VERSION - }) - - posthog.capture(distinct_id=uid, - event=action, - properties={ - 'event_id': event_id, - 'user_id': uid, - 'action': action, - 'client_time': str(client_time), - 'metadata': metadata, - 'total_runtime': total_runtime, - 'python_version': py_version, - 'ploomber_version': product_version, - 'docker_container': docker_container, - 'operating_system': operating_system, - 'environment': environment, - 'metadata': metadata, - 'telemetry_version': TELEMETRY_VERSION - }) + properties=props) + + posthog.capture(distinct_id=uid, event=action, properties=props) diff --git a/tests/telemetry/test_telemetry.py b/tests/telemetry/test_telemetry.py index 7d76bdea3..eaf3c320f 100644 --- a/tests/telemetry/test_telemetry.py +++ b/tests/telemetry/test_telemetry.py @@ -8,6 +8,8 @@ from ploomber.telemetry.validate_inputs import str_param, opt_str_param from ploomber.cli import plot, install, build, interact, task, report, status import ploomber.dag.dag as dag_module +from ploomber.tasks.tasks import PythonCallable +from ploomber.products.file import File from conftest import _write_sample_conda_env, _prepare_files @@ -146,6 +148,41 @@ def mock(input_path): assert docker is True +# Ref https://stackoverflow.com/questions/53581278/test-if- +# notebook-is-running-on-google-colab +def test_colab_env(monkeypatch): + monkeypatch.setenv('COLAB_GPU', True) + colab = telemetry.is_colab() + assert colab is True + + +# Ref https://learn.paperspace.com/video/creating-a-jupyter-notebook +@pytest.mark.parametrize( + 'env_variable', + ['PS_API_KEY', 'PAPERSPACE_API_KEY', 'PAPERSPACE_NOTEBOOK_REPO_ID']) +def test_paperspace_env(monkeypatch, env_variable): + monkeypatch.setenv(env_variable, True) + pspace = telemetry.is_paperspace() + assert pspace is True + + +# Ref https://stackoverflow.com/questions/63298054/how-to-check-if-my-code +# -runs-inside-a-slurm-environment +def test_slurm_env(monkeypatch): + monkeypatch.setenv('SLURM_JOB_ID', True) + slurm = telemetry.is_slurm() + assert slurm is True + + +# Ref https://airflow.apache.org/docs/apache-airflow/stable/ +# cli-and-env-variables-ref.html?highlight=airflow_home#envvar-AIRFLOW_HOME +@pytest.mark.parametrize('env_variable', ['AIRFLOW_CONFIG', 'AIRFLOW_HOME']) +def test_airflow_env(monkeypatch, env_variable): + monkeypatch.setenv(env_variable, True) + airflow = telemetry.is_airflow() + assert airflow is True + + # Ref https://stackoverflow.com/questions/110362/how-can-i-find- # the-current-os-in-python @pytest.mark.parametrize('os_param', ['Windows', 'Linux', 'MacOS', 'Ubuntu']) @@ -333,6 +370,75 @@ def test_is_online(): assert telemetry.is_online() +def test_parse_dag_products(monkeypatch): + product = '/ml-basic/output/get.parquet' + dag = telemetry.clean_tasks_upstream_products(product) + assert dag == 'get.parquet' + + products = { + 'nb': '/spec-api-python/output/get.ipynb', + 'data': '//spec-api-python/output/data.csv' + } + dag = telemetry.clean_tasks_upstream_products(products) + assert dag == {'nb': 'get.ipynb', 'data': 'data.csv'} + + dag = telemetry.clean_tasks_upstream_products({}) + assert dag == {} + + +def test_parse_dag(monkeypatch, tmp_directory): + def fn1(product): + pass + + def fn2(upstream, product): + pass + + dag = dag_module.DAG() + t1 = PythonCallable(fn1, + File('/home/ido/filepath/file1.txt'), + dag, + name='first') + t2 = PythonCallable(fn2, + File('/home/ido/filepath/file2.txt'), + dag, + name='second') + t3 = PythonCallable(fn2, + File('/home/ido/filepath/file3.parquet'), + dag, + name='third') + t1 >> t2 + t3 >> t2 + + res = telemetry.parse_dag(dag) + assert res['dag_size'] == 3 + assert res == { + 'dag_size': 3, + 'tasks': { + 'first': { + 'status': 'WaitingRender', + 'type': 'PythonCallable', + 'upstream': {}, + 'products': 'file1.txt' + }, + 'third': { + 'status': 'WaitingRender', + 'type': 'PythonCallable', + 'upstream': {}, + 'products': 'file3.parquet' + }, + 'second': { + 'status': 'WaitingRender', + 'type': 'PythonCallable', + 'upstream': { + 'first': 'file1.txt', + 'third': 'file3.parquet' + }, + 'products': 'file2.txt' + } + } + } + + def test_is_not_online(monkeypatch): mock_httplib = Mock() mock_httplib.HTTPSConnection().request.side_effect = Exception