From a5bcdc9d52ba77dcb3f710a9eb969059faf5bea5 Mon Sep 17 00:00:00 2001
From: idomic <michael.ido@gmail.com>
Date: Mon, 17 Jan 2022 15:00:30 -0500
Subject: [PATCH] Stats fixes/updates

linting

linting

Fixing tests

Changing to argv from args

lint

test_parse_dag

test_parse_dag

Review fixes

Added field for missing UIDs

Additional function to get home dir
---
 setup.py                            |   1 +
 src/ploomber/cli/build.py           |  13 +-
 src/ploomber/cli/cli.py             |  60 ++++++-
 src/ploomber/cli/examples.py        |  17 ++
 src/ploomber/cli/install.py         |  79 ++++++---
 src/ploomber/cli/interact.py        |  14 +-
 src/ploomber/cli/nb.py              |  28 +++-
 src/ploomber/cli/parsers.py         |   2 -
 src/ploomber/cli/plot.py            |   6 +-
 src/ploomber/cli/report.py          |   6 +-
 src/ploomber/cli/status.py          |   3 +-
 src/ploomber/cli/task.py            |   6 +-
 src/ploomber/telemetry/telemetry.py | 239 +++++++++++++++++++++-------
 tests/telemetry/test_telemetry.py   | 106 ++++++++++++
 14 files changed, 479 insertions(+), 101 deletions(-)

diff --git a/setup.py b/setup.py
index 2ff3a3bdb..4e1da3a30 100644
--- a/setup.py
+++ b/setup.py
@@ -153,6 +153,7 @@ def read(name):
         'humanize',
         'tqdm',
         'posthog',
+        'distro',
         'importlib_resources;python_version<"3.7"',
         # for code normalization, parso is also needed for inferring upstream
         # dependencies in jupyter notebooks
diff --git a/src/ploomber/cli/build.py b/src/ploomber/cli/build.py
index 181271a1d..47822e965 100644
--- a/src/ploomber/cli/build.py
+++ b/src/ploomber/cli/build.py
@@ -40,7 +40,14 @@ def main(render_only=False):
     # users may try to run "ploomber build {name}" to build a single task
     if len(sys.argv) > 1 and not sys.argv[1].startswith('-'):
         suggestion = 'ploomber task {task-name}'
-        parser.error(f'{parser.prog!r} does not take positional arguments.\n'
+        cmd_name = parser.prog
+        telemetry.log_api("unsupported_build_cmd",
+                          metadata={
+                              'cmd_name': cmd_name,
+                              'suggestion': suggestion,
+                              'argv': sys.argv
+                          })
+        parser.error(f'{cmd_name!r} does not take positional arguments.\n'
                      f'To build a single task, try: {suggestion!r}')
 
     dag, args = parser.load_from_entry_point_arg()
@@ -65,5 +72,7 @@ def main(render_only=False):
             print(report)
     end_time = datetime.datetime.now()
     telemetry.log_api("ploomber_build",
-                      total_runtime=str(end_time - start_time))
+                      total_runtime=str(end_time - start_time),
+                      dag=dag,
+                      metadata={'argv': sys.argv})
     return dag
diff --git a/src/ploomber/cli/cli.py b/src/ploomber/cli/cli.py
index b01c02e1d..bb35f9d69 100644
--- a/src/ploomber/cli/cli.py
+++ b/src/ploomber/cli/cli.py
@@ -48,13 +48,34 @@ def scaffold(conda, package, entry_point, empty):
     template = '-e/--entry-point is not compatible with the {flag} flag'
 
     if entry_point and conda:
-        raise click.ClickException(template.format(flag='--conda'))
+        err = template.format(flag='--conda')
+        telemetry.log_api("scaffold_error",
+                          metadata={
+                              'type': 'entry_and_conda_flag',
+                              'exception': err,
+                              'argv': sys.argv
+                          })
+        raise click.ClickException(err)
 
     if entry_point and package:
-        raise click.ClickException(template.format(flag='--package'))
+        err = template.format(flag='--package')
+        telemetry.log_api("scaffold_error",
+                          metadata={
+                              'type': 'entry_and_package_flag',
+                              'exception': err,
+                              'argv': sys.argv
+                          })
+        raise click.ClickException(err)
 
     if entry_point and empty:
-        raise click.ClickException(template.format(flag='--empty'))
+        err = template.format(flag='--empty')
+        telemetry.log_api("scaffold_error",
+                          metadata={
+                              'type': 'entry_and_empty_flag',
+                              'exception': err,
+                              'argv': sys.argv
+                          })
+        raise click.ClickException(err)
 
     # try to load a dag by looking in default places
     if entry_point is None:
@@ -67,14 +88,31 @@ def scaffold(conda, package, entry_point, empty):
                 Path(entry_point),
             )
         except Exception as e:
+            telemetry.log_api("scaffold_error",
+                              metadata={
+                                  'type': 'dag_load_failed',
+                                  'exception': e,
+                                  'argv': sys.argv
+                              })
             raise click.ClickException(e) from e
 
     if loaded:
         # existing pipeline, add tasks
         spec, _, path_to_spec = loaded
         _scaffold.add(spec, path_to_spec)
+        telemetry.log_api("ploomber_scaffold",
+                          dag=loaded,
+                          metadata={
+                              'type': 'add_task',
+                              'argv': sys.argv
+                          })
     else:
         # no pipeline, create base project
+        telemetry.log_api("ploomber_scaffold",
+                          metadata={
+                              'type': 'base_project',
+                              'argv': sys.argv
+                          })
         scaffold_project.cli(project_path=None,
                              conda=conda,
                              package=package,
@@ -109,6 +147,12 @@ def examples(name, force, branch, output):
     except click.ClickException:
         raise
     except Exception as e:
+        telemetry.log_api("examples_error",
+                          metadata={
+                              'type': 'runtime_error',
+                              'exception': e,
+                              'argv': sys.argv
+                          })
         raise RuntimeError(
             'An error happened when executing the examples command. Check out '
             'the full error message for details. Downloading the examples '
@@ -150,11 +194,19 @@ def cmd_router():
         fn()
     elif cmd_name in alias:
         suggestion = alias[cmd_name]
+        telemetry.log_api("unsupported_build_cmd",
+                          metadata={
+                              'cmd_name': cmd_name,
+                              'suggestion': suggestion,
+                              'argv': sys.argv
+                          })
         _exit_with_error_message("Try 'ploomber --help' for help.\n\n"
                                  f"Error: {cmd_name!r} is not a valid command."
                                  f" Did you mean {suggestion!r}?")
     else:
-        telemetry.log_api("unsupported-api-call", metadata={'argv': sys.argv})
+        if cmd_name not in ['examples', 'scaffold']:
+            telemetry.log_api("unsupported-api-call",
+                              metadata={'argv': sys.argv})
         cli()
 
 
diff --git a/src/ploomber/cli/examples.py b/src/ploomber/cli/examples.py
index 13919aff2..ac4897fe6 100644
--- a/src/ploomber/cli/examples.py
+++ b/src/ploomber/cli/examples.py
@@ -2,6 +2,7 @@
 import stat
 import os
 import csv
+import sys
 from datetime import datetime
 import json
 import shutil
@@ -13,6 +14,7 @@
 
 from ploomber.io.terminalwriter import TerminalWriter
 from ploomber.table import Table
+from ploomber.telemetry import telemetry
 
 from pygments.formatters.terminal import TerminalFormatter
 from pygments.lexers.markup import MarkdownLexer
@@ -250,6 +252,11 @@ def main(name, force=False, branch=None, output=None):
         selected = manager.path_to(name)
 
         if not selected.exists():
+            telemetry.log_api("examples_error",
+                              metadata={
+                                  'type': 'wrong_example_name',
+                                  'example_name': name
+                              })
             click.echo(f'There is no example named {name!r}.\n'
                        'To list examples: ploomber examples\n'
                        'To update local copy: ploomber examples -f')
@@ -259,6 +266,11 @@ def main(name, force=False, branch=None, output=None):
             tw.sep('=', f'Copying example {name!r} to {output}/', green=True)
 
             if Path(output).exists():
+                telemetry.log_api("examples_error",
+                                  metadata={
+                                      'type': 'duplicated_output',
+                                      'output_name': output
+                                  })
                 raise click.ClickException(
                     f"{output!r} already exists in the current working "
                     "directory, please rename it or move it "
@@ -279,3 +291,8 @@ def main(name, force=False, branch=None, output=None):
                      f'\n* conda env create -f environment.yml'
                      f'\n* pip install -r requirements.txt\n')
             tw.sep('=', blue=True)
+            telemetry.log_api("ploomber_examples",
+                              metadata={
+                                  'argv': sys.argv,
+                                  'example_name': name
+                              })
diff --git a/src/ploomber/cli/install.py b/src/ploomber/cli/install.py
index a55460357..031832988 100644
--- a/src/ploomber/cli/install.py
+++ b/src/ploomber/cli/install.py
@@ -45,29 +45,41 @@ def main(use_lock):
         err = ("Expected and environment.lock.yaml "
                "(conda) or requirements.lock.txt (pip) in the current "
                "directory. Add one of them and try again.")
-        telemetry.log_api("install-exception-lock",
-                          metadata={'Exception': err})
+        telemetry.log_api("install-error",
+                          metadata={
+                              'type': 'no_lock',
+                              'exception': err
+                          })
         raise exceptions.ClickException(err)
     elif not use_lock and not HAS_ENV_YML and not HAS_REQS_TXT:
         err = ("Expected an environment.yaml (conda)"
                " or requirements.txt (pip) in the current directory."
                " Add one of them and try again.")
-        telemetry.log_api("install-exception-requirements",
-                          metadata={'Exception': err})
+        telemetry.log_api("install-error",
+                          metadata={
+                              'type': 'no_env_requirements',
+                              'exception': err
+                          })
         raise exceptions.ClickException(err)
     elif (not HAS_CONDA and use_lock and HAS_ENV_LOCK_YML
           and not HAS_REQS_LOCK_TXT):
         err = ("Found env environment.lock.yaml "
                "but conda is not installed. Install conda or add a "
                "requirements.lock.txt to use pip instead")
-        telemetry.log_api("install-exception-conda",
-                          metadata={'Exception': err})
+        telemetry.log_api("install-error",
+                          metadata={
+                              'type': 'no_conda',
+                              'exception': err
+                          })
         raise exceptions.ClickException(err)
     elif not HAS_CONDA and not use_lock and HAS_ENV_YML and not HAS_REQS_TXT:
         err = ("Found environment.yaml but conda is not installed."
                " Install conda or add a requirements.txt to use pip instead")
-        telemetry.log_api("install-exception-conda2",
-                          metadata={'Exception': err})
+        telemetry.log_api("install-error",
+                          metadata={
+                              'type': 'no_conda2',
+                              'exception': err
+                          })
         raise exceptions.ClickException(err)
     elif HAS_CONDA and use_lock and HAS_ENV_LOCK_YML:
         main_conda(start_time, use_lock=True)
@@ -154,10 +166,16 @@ def main_conda(start_time, use_lock):
     current_env = Path(shutil.which('python')).parents[1].name
 
     if env_name == current_env:
-        raise RuntimeError(f'{env_yml} will create an environment '
-                           f'named {env_name!r}, which is the current active '
-                           'environment. Move to a different one and try '
-                           'again (e.g., "conda activate base")')
+        err = (f'{env_yml} will create an environment '
+               f'named {env_name!r}, which is the current active '
+               'environment. Move to a different one and try '
+               'again (e.g., "conda activate base")')
+        telemetry.log_api("install-error",
+                          metadata={
+                              'type': 'env_running_conflict',
+                              'exception': err
+                          })
+        raise RuntimeError(err)
 
     # get current installed envs
     conda = shutil.which('conda')
@@ -175,9 +193,15 @@ def main_conda(start_time, use_lock):
         ])
 
         if already_installed:
-            raise ValueError(f'Environment {env_name!r} already exists, '
-                             f'delete it and try again '
-                             f'(conda env remove --name {env_name})')
+            err = (f'Environment {env_name!r} already exists, '
+                   f'delete it and try again '
+                   f'(conda env remove --name {env_name})')
+            telemetry.log_api("install-error",
+                              metadata={
+                                  'type': 'duplicate_env',
+                                  'exception': err
+                              })
+            raise ValueError(err)
 
     pkg_manager = mamba if mamba else conda
     cmdr.run(pkg_manager,
@@ -225,11 +249,15 @@ def _find_conda_root(conda_bin):
         # on linux miniconda3, anaconda and miniconda
         if parent.name.lower() in {'miniconda3', 'miniconda', 'anaconda3'}:
             return parent
-
-    raise RuntimeError(
-        'Failed to locate conda root from '
-        f'directory: {str(conda_bin)!r}. Please submit an issue: '
-        'https://github.com/ploomber/ploomber/issues/new')
+    err = ('Failed to locate conda root from '
+           f'directory: {str(conda_bin)!r}. Please submit an issue: '
+           'https://github.com/ploomber/ploomber/issues/new')
+    telemetry.log_api("install-error",
+                      metadata={
+                          'type': 'no_conda_root',
+                          'exception': err
+                      })
+    raise RuntimeError(err)
 
 
 def _path_to_pip_in_env_with_name(conda_bin, env_name):
@@ -246,9 +274,14 @@ def _locate_pip_inside_conda(env_name):
 
     # this might happen if the environment does not contain python/pip
     if not Path(pip).exists():
-        raise FileNotFoundError(
-            f'Could not locate pip in environment {env_name!r}, make sure '
-            'it is included in your environment.yml and try again')
+        err = (f'Could not locate pip in environment {env_name!r}, make sure '
+               'it is included in your environment.yml and try again')
+        telemetry.log_api("install-error",
+                          metadata={
+                              'type': 'no_pip_env',
+                              'exception': err
+                          })
+        raise FileNotFoundError(err)
 
     return pip
 
diff --git a/src/ploomber/cli/interact.py b/src/ploomber/cli/interact.py
index 9b5c051e5..3635bfdcc 100644
--- a/src/ploomber/cli/interact.py
+++ b/src/ploomber/cli/interact.py
@@ -20,12 +20,20 @@ def main():
     try:
         dag.render()
     except Exception:
-        print('Your dag failed to render, but you can still inspect the '
-              'object to debug it.\n')
+        err = ('Your dag failed to render, but you can still inspect the '
+               'object to debug it.\n')
+        telemetry.log_api("interact_error",
+                          dag=dag,
+                          metadata={
+                              'type': 'dag_render_failed',
+                              'exception': err
+                          })
+        print(err)
 
     end_time = datetime.datetime.now()
     telemetry.log_api("ploomber_interact",
-                      total_runtime=str(end_time - start_time))
+                      total_runtime=str(end_time - start_time),
+                      dag=dag)
 
     # NOTE: do not use embed here, we must use start_ipython, see here:
     # https://github.com/ipython/ipython/issues/8918
diff --git a/src/ploomber/cli/nb.py b/src/ploomber/cli/nb.py
index 5d0347eb3..95785070f 100644
--- a/src/ploomber/cli/nb.py
+++ b/src/ploomber/cli/nb.py
@@ -1,11 +1,13 @@
 import shutil
 from pathlib import Path
 import stat
+import sys
 
 import click
 
 from ploomber.cli.parsers import CustomParser
 from ploomber.cli.io import command_endpoint
+from ploomber.telemetry import telemetry
 
 
 def _call_in_source(dag, method_name, message, kwargs=None):
@@ -124,8 +126,14 @@ def main():
         dag.render(show_progress=False)
 
     if loading_error:
-        raise RuntimeError('Could not run nb command: the DAG '
-                           'failed to load') from loading_error
+        err = ('Could not run nb command: the DAG ' 'failed to load')
+        telemetry.log_api("nb_error",
+                          metadata={
+                              'type': 'dag_load_failed',
+                              'exception': err + f' {loading_error}',
+                              'argv': sys.argv
+                          })
+        raise RuntimeError(err) from loading_error
 
     if args.format:
         new_paths = [
@@ -170,14 +178,20 @@ def main():
             'Paired notebooks',
             dict(base_path=args.pair),
         )
-        click.echo(f'Finshed pairing notebooks. Tip: add {args.pair!r} to '
+        click.echo(f'Finished pairing notebooks. Tip: add {args.pair!r} to '
                    'your .gitignore to keep your repository clean')
 
     if args.install_hook:
         if not Path('.git').is_dir():
-            raise NotADirectoryError(
-                'Expected a .git/ directory in the current working '
-                'directory. Run this from the repository root directory.')
+            err = ('Expected a .git/ directory in the current working '
+                   'directory. Run this from the repository root directory.')
+            telemetry.log_api("nb_error",
+                              metadata={
+                                  'type': 'no_git_config',
+                                  'exception': err,
+                                  'argv': sys.argv
+                              })
+            raise NotADirectoryError(err)
 
         parent = Path('.git', 'hooks')
         parent.mkdir(exist_ok=True)
@@ -194,3 +208,5 @@ def main():
     if args.uninstall_hook:
         _delete_hook(Path('.git', 'hooks', 'pre-commit'))
         _delete_hook(Path('.git', 'hooks', 'post-commit'))
+
+    telemetry.log_api("ploomber_nb", dag=dag, metadata={'argv': sys.argv})
diff --git a/src/ploomber/cli/parsers.py b/src/ploomber/cli/parsers.py
index 5659b5707..876926878 100644
--- a/src/ploomber/cli/parsers.py
+++ b/src/ploomber/cli/parsers.py
@@ -38,7 +38,6 @@ class CustomMutuallyExclusiveGroup(argparse._MutuallyExclusiveGroup):
     A subclass of argparse._MutuallyExclusiveGroup that determines whether
     the added args should go into the static of dynamic API
     """
-
     def add_argument(self, *args, **kwargs):
         if not self._container.finished_static_api:
             if (not self._container.in_context
@@ -66,7 +65,6 @@ class CustomParser(argparse.ArgumentParser):
     manager, only after this has happened, other arguments can be added using
     parser.add_argument.
     """
-
     def __init__(self, *args, **kwargs):
         self.DEFAULT_ENTRY_POINT = default.try_to_find_entry_point()
 
diff --git a/src/ploomber/cli/plot.py b/src/ploomber/cli/plot.py
index f7d7f103b..0a718e623 100644
--- a/src/ploomber/cli/plot.py
+++ b/src/ploomber/cli/plot.py
@@ -1,3 +1,5 @@
+import sys
+
 from ploomber.cli.parsers import CustomParser
 from ploomber.cli.io import cli_endpoint
 from ploomber.util.default import extract_name
@@ -27,5 +29,7 @@ def main():
     dag.plot(output=output)
     end_time = datetime.datetime.now()
     telemetry.log_api("ploomber_plot",
-                      total_runtime=str(end_time - start_time))
+                      total_runtime=str(end_time - start_time),
+                      dag=dag,
+                      metadata={'argv': sys.argv})
     print('Plot saved at:', output)
diff --git a/src/ploomber/cli/report.py b/src/ploomber/cli/report.py
index ca4c18cc7..90a72a331 100644
--- a/src/ploomber/cli/report.py
+++ b/src/ploomber/cli/report.py
@@ -1,3 +1,5 @@
+import sys
+
 from ploomber.cli.parsers import CustomParser
 from ploomber.cli.io import cli_endpoint
 from ploomber.telemetry import telemetry
@@ -19,5 +21,7 @@ def main():
     dag.to_markup(path=args.output)
     end_time = datetime.datetime.now()
     telemetry.log_api("ploomber_report",
-                      total_runtime=str(end_time - start_time))
+                      total_runtime=str(end_time - start_time),
+                      dag=dag,
+                      metadata={'argv': sys.argv})
     print('Report saved at:', args.output)
diff --git a/src/ploomber/cli/status.py b/src/ploomber/cli/status.py
index 5c5e4f83f..ce94a2714 100644
--- a/src/ploomber/cli/status.py
+++ b/src/ploomber/cli/status.py
@@ -16,4 +16,5 @@ def main():
     print(dag.status())
     end_time = datetime.datetime.now()
     telemetry.log_api("ploomber_status",
-                      total_runtime=str(end_time - start_time))
+                      total_runtime=str(end_time - start_time),
+                      dag=dag)
diff --git a/src/ploomber/cli/task.py b/src/ploomber/cli/task.py
index 3104cf257..20fd622ce 100644
--- a/src/ploomber/cli/task.py
+++ b/src/ploomber/cli/task.py
@@ -1,3 +1,5 @@
+import sys
+
 from ploomber.cli.parsers import CustomParser
 from ploomber.cli.io import cli_endpoint
 from ploomber.telemetry import telemetry
@@ -57,4 +59,6 @@ def main():
 
     end_time = datetime.datetime.now()
     telemetry.log_api("ploomber_task",
-                      total_runtime=str(end_time - start_time))
+                      total_runtime=str(end_time - start_time),
+                      dag=dag,
+                      metadata={'argv': sys.argv})
diff --git a/src/ploomber/telemetry/telemetry.py b/src/ploomber/telemetry/telemetry.py
index 00c945900..b717582eb 100644
--- a/src/ploomber/telemetry/telemetry.py
+++ b/src/ploomber/telemetry/telemetry.py
@@ -31,17 +31,21 @@
 
 import datetime
 import http.client as httplib
+import warnings
+
 import posthog
 import yaml
 import os
 from pathlib import Path
 import sys
 import uuid
+
 from ploomber.telemetry import validate_inputs
 from ploomber import __version__
 import platform
+import distro
 
-TELEMETRY_VERSION = '0.2'
+TELEMETRY_VERSION = '0.3'
 DEFAULT_HOME_DIR = '~/.ploomber'
 CONF_DIR = "stats"
 posthog.project_api_key = 'phc_P9SpSeypyPwxrMdFn2edOOEooQioF2axppyEeDwtMSP'
@@ -69,11 +73,14 @@ def is_online():
 
 # Will output if the code is within a container
 def is_docker():
-    cgroup = Path('/proc/self/cgroup')
-    docker_env = Path('/.dockerenv')
-    return (docker_env.exists() or cgroup.exists()
-            and any('docker' in line
-                    for line in cgroup.read_text().splitlines()))
+    try:
+        cgroup = Path('/proc/self/cgroup')
+        docker_env = Path('/.dockerenv')
+        return (docker_env.exists() or cgroup.exists()
+                and any('docker' in line
+                        for line in cgroup.read_text().splitlines()))
+    except OSError:
+        return False
 
 
 def get_os():
@@ -87,6 +94,36 @@ def get_os():
         return os
 
 
+def test():
+    """
+    Returns:
+        A dict of system information.
+    """
+    os = platform.system()
+    if os == "Darwin":
+        return {"os": "mac", "mac_version": platform.mac_ver()[0]}
+
+    if os == "Windows":
+        release, version, csd, platform_type = platform.win32_ver()
+        return {
+            "os": "windows",
+            "windows_version_release": release,
+            "windows_version": version,
+            "windows_version_service_pack": csd,
+            "windows_version_os_type": platform_type,
+        }
+
+    if os == "Linux":
+        return {
+            "os": "linux",
+            "linux_distro": distro.id(),
+            "linux_distro_like": distro.like(),
+            "linux_distro_version": distro.version(),
+        }
+
+    return {"os": os}
+
+
 def is_conda():
     """
     The function will tell if the code is running in a conda env
@@ -100,8 +137,8 @@ def get_base_prefix_compat():
     """
     This function will find the pip virtualenv with different python versions.
     Get base/real prefix, or sys.prefix if there is none."""
-    return getattr(sys, "base_prefix", None) or sys.prefix or \
-        getattr(sys, "real_prefix", None)
+    return getattr(sys, "base_prefix", None) or sys.prefix or getattr(
+        sys, "real_prefix", None)
 
 
 def in_virtualenv():
@@ -109,6 +146,7 @@ def in_virtualenv():
 
 
 def get_env():
+    """Returns: The name of the virtual env if exists as str"""
     if in_virtualenv():
         return 'pip'
     elif is_conda():
@@ -117,20 +155,88 @@ def get_env():
         return 'local'
 
 
+def is_colab():
+    """Returns: True for Google Colab env"""
+    return "COLAB_GPU" in os.environ
+
+
+def is_paperspace():
+    """Returns: True for Paperspace env"""
+    return "PS_API_KEY" in os.environ or\
+           "PAPERSPACE_API_KEY" in os.environ or\
+           "PAPERSPACE_NOTEBOOK_REPO_ID" in os.environ
+
+
+def is_slurm():
+    """Returns: True for Slurm env"""
+    return "SLURM_JOB_ID" in os.environ
+
+
+def is_airflow():
+    """Returns: True for Airflow env"""
+    return "AIRFLOW_CONFIG" in os.environ or "AIRFLOW_HOME" in os.environ
+
+
+def is_argo():
+    """Returns: True for Airflow env"""
+    return "ARGO_AGENT_TASK_WORKERS" in os.environ or \
+           "ARGO_KUBELET_PORT" in os.environ
+
+
+def clean_tasks_upstream_products(input):
+    clean_input = {}
+    try:
+        product_items = input.items()
+        for product_item_name, product_item in product_items:
+            clean_input[product_item_name] = str(product_item).split("/")[-1]
+    except AttributeError:  # Single product
+        return str(input.split("/")[-1])
+
+    return clean_input
+
+
+def parse_dag(dag):
+    try:
+        dag_dict = {}
+        dag_dict["dag_size"] = len(dag)
+        tasks_list = list(dag)
+        if tasks_list:
+            dag_dict["tasks"] = {}
+            for task in tasks_list:
+                task_dict = {}
+                task_dict["status"] = dag[task]._exec_status.name
+                task_dict["type"] = str(type(
+                    dag[task])).split(".")[-1].split("'")[0]
+                task_dict["upstream"] = clean_tasks_upstream_products(
+                    dag[task].upstream)
+                task_dict["products"] = clean_tasks_upstream_products(
+                    dag[task].product.to_json_serializable())
+                dag_dict['tasks'][task] = task_dict
+
+        return dag_dict
+    except Exception:
+        return None
+
+
+def get_home_dir():
+    """
+    Checks if ploomber home was set through the env variable.
+    returns the actual home_dir path.
+    """
+    return PLOOMBER_HOME_DIR if PLOOMBER_HOME_DIR else DEFAULT_HOME_DIR
+
+
 def check_dir_exist(input_location=None):
     """
     Checks if a specific directory exists, creates if not.
     In case the user didn't set a custom dir, will turn to the default home
     """
-    if PLOOMBER_HOME_DIR:
-        final_location = PLOOMBER_HOME_DIR
-    else:
-        final_location = DEFAULT_HOME_DIR
+    home_dir = get_home_dir()
 
     if input_location:
-        p = Path(final_location, input_location)
+        p = Path(home_dir, input_location)
     else:
-        p = Path(final_location)
+        p = Path(home_dir)
 
     p = p.expanduser()
 
@@ -151,15 +257,17 @@ def check_uid():
             with uid_path.open("w") as file:
                 yaml.dump({"uid": uid}, file)
             return uid
-        except FileNotFoundError as e:
-            return f"ERROR: Can't read UID file: {e}"
+        except Exception as e:
+            warnings.warn(f"ERROR: Can't write UID file: {e}")
+            return f"NO_UID {e}"
     else:  # read and return uid
         try:
             with uid_path.open("r") as file:
                 uid_dict = yaml.safe_load(file)
             return uid_dict['uid']
-        except FileNotFoundError as e:
-            return f"Error: Can't read UID file: {e}"
+        except Exception as e:
+            warnings.warn(f"Error: Can't read UID file: {e}")
+            return f"NO_UID {e}"
 
 
 def check_stats_enabled():
@@ -179,15 +287,17 @@ def check_stats_enabled():
             with config_path.open("w") as file:
                 yaml.dump({"stats_enabled": True}, file)
             return True
-        except FileNotFoundError as e:
-            return f"ERROR: Can't read file: {e}"
+        except Exception as e:
+            warnings.warn(f"ERROR: Can't write to config file: {e}")
+            return True
     else:  # read and return config
         try:
             with config_path.open("r") as file:
                 conf = yaml.safe_load(file)
             return conf['stats_enabled']
-        except FileNotFoundError as e:
-            return f"Error: Can't read config file {e}"
+        except Exception as e:
+            warnings.warn(f"Error: Can't read config file {e}")
+            return True
 
 
 def check_first_time_usage():
@@ -230,7 +340,11 @@ def validate_entries(event_id, uid, action, client_time, total_runtime):
     return event_id, uid, action, client_time, elapsed_time
 
 
-def log_api(action, client_time=None, total_runtime=None, metadata=None):
+def log_api(action,
+            client_time=None,
+            total_runtime=None,
+            dag=None,
+            metadata={}):
     """
     This function logs through an API call, assigns parameters if missing like
     timestamp, event id and stats information.
@@ -240,9 +354,34 @@ def log_api(action, client_time=None, total_runtime=None, metadata=None):
         client_time = datetime.datetime.now()
 
     (telemetry_enabled, uid, is_install) = _get_telemetry_info()
+    if 'NO_UID' in uid:
+        metadata['uid_issue'] = uid
+        uid = None
+
     py_version = python_version()
     docker_container = is_docker()
-    operating_system = get_os()
+    colab = is_colab()
+    if colab:
+        metadata['colab'] = colab
+
+    paperspace = is_paperspace()
+    if paperspace:
+        metadata['paperspace'] = paperspace
+
+    slurm = is_slurm()
+    if slurm:
+        metadata['slurm'] = slurm
+
+    airflow = is_airflow()
+    if airflow:
+        metadata['airflow'] = airflow
+
+    argo = is_argo()
+    if argo:
+        metadata['argo'] = argo
+    if dag:
+        metadata['dag'] = parse_dag(dag)
+    os = get_os()
     product_version = __version__
     online = is_online()
     environment = get_env()
@@ -254,39 +393,25 @@ def log_api(action, client_time=None, total_runtime=None, metadata=None):
                                action,
                                client_time,
                                total_runtime)
+        props = {
+            'event_id': event_id,
+            'user_id': uid,
+            'action': action,
+            'client_time': str(client_time),
+            'metadata': metadata,
+            'total_runtime': total_runtime,
+            'python_version': py_version,
+            'ploomber_version': product_version,
+            'docker_container': docker_container,
+            'os': os,
+            'environment': environment,
+            'metadata': metadata,
+            'telemetry_version': TELEMETRY_VERSION
+        }
+
         if is_install:
             posthog.capture(distinct_id=uid,
                             event='install_success_indirect',
-                            properties={
-                                'event_id': event_id,
-                                'user_id': uid,
-                                'action': action,
-                                'client_time': str(client_time),
-                                'metadata': metadata,
-                                'total_runtime': total_runtime,
-                                'python_version': py_version,
-                                'ploomber_version': product_version,
-                                'docker_container': docker_container,
-                                'operating_system': operating_system,
-                                'environment': environment,
-                                'metadata': metadata,
-                                'telemetry_version': TELEMETRY_VERSION
-                            })
-
-        posthog.capture(distinct_id=uid,
-                        event=action,
-                        properties={
-                            'event_id': event_id,
-                            'user_id': uid,
-                            'action': action,
-                            'client_time': str(client_time),
-                            'metadata': metadata,
-                            'total_runtime': total_runtime,
-                            'python_version': py_version,
-                            'ploomber_version': product_version,
-                            'docker_container': docker_container,
-                            'operating_system': operating_system,
-                            'environment': environment,
-                            'metadata': metadata,
-                            'telemetry_version': TELEMETRY_VERSION
-                        })
+                            properties=props)
+
+        posthog.capture(distinct_id=uid, event=action, properties=props)
diff --git a/tests/telemetry/test_telemetry.py b/tests/telemetry/test_telemetry.py
index 7d76bdea3..eaf3c320f 100644
--- a/tests/telemetry/test_telemetry.py
+++ b/tests/telemetry/test_telemetry.py
@@ -8,6 +8,8 @@
 from ploomber.telemetry.validate_inputs import str_param, opt_str_param
 from ploomber.cli import plot, install, build, interact, task, report, status
 import ploomber.dag.dag as dag_module
+from ploomber.tasks.tasks import PythonCallable
+from ploomber.products.file import File
 from conftest import _write_sample_conda_env, _prepare_files
 
 
@@ -146,6 +148,41 @@ def mock(input_path):
     assert docker is True
 
 
+# Ref https://stackoverflow.com/questions/53581278/test-if-
+# notebook-is-running-on-google-colab
+def test_colab_env(monkeypatch):
+    monkeypatch.setenv('COLAB_GPU', True)
+    colab = telemetry.is_colab()
+    assert colab is True
+
+
+# Ref https://learn.paperspace.com/video/creating-a-jupyter-notebook
+@pytest.mark.parametrize(
+    'env_variable',
+    ['PS_API_KEY', 'PAPERSPACE_API_KEY', 'PAPERSPACE_NOTEBOOK_REPO_ID'])
+def test_paperspace_env(monkeypatch, env_variable):
+    monkeypatch.setenv(env_variable, True)
+    pspace = telemetry.is_paperspace()
+    assert pspace is True
+
+
+# Ref https://stackoverflow.com/questions/63298054/how-to-check-if-my-code
+# -runs-inside-a-slurm-environment
+def test_slurm_env(monkeypatch):
+    monkeypatch.setenv('SLURM_JOB_ID', True)
+    slurm = telemetry.is_slurm()
+    assert slurm is True
+
+
+# Ref https://airflow.apache.org/docs/apache-airflow/stable/
+# cli-and-env-variables-ref.html?highlight=airflow_home#envvar-AIRFLOW_HOME
+@pytest.mark.parametrize('env_variable', ['AIRFLOW_CONFIG', 'AIRFLOW_HOME'])
+def test_airflow_env(monkeypatch, env_variable):
+    monkeypatch.setenv(env_variable, True)
+    airflow = telemetry.is_airflow()
+    assert airflow is True
+
+
 # Ref https://stackoverflow.com/questions/110362/how-can-i-find-
 # the-current-os-in-python
 @pytest.mark.parametrize('os_param', ['Windows', 'Linux', 'MacOS', 'Ubuntu'])
@@ -333,6 +370,75 @@ def test_is_online():
     assert telemetry.is_online()
 
 
+def test_parse_dag_products(monkeypatch):
+    product = '/ml-basic/output/get.parquet'
+    dag = telemetry.clean_tasks_upstream_products(product)
+    assert dag == 'get.parquet'
+
+    products = {
+        'nb': '/spec-api-python/output/get.ipynb',
+        'data': '//spec-api-python/output/data.csv'
+    }
+    dag = telemetry.clean_tasks_upstream_products(products)
+    assert dag == {'nb': 'get.ipynb', 'data': 'data.csv'}
+
+    dag = telemetry.clean_tasks_upstream_products({})
+    assert dag == {}
+
+
+def test_parse_dag(monkeypatch, tmp_directory):
+    def fn1(product):
+        pass
+
+    def fn2(upstream, product):
+        pass
+
+    dag = dag_module.DAG()
+    t1 = PythonCallable(fn1,
+                        File('/home/ido/filepath/file1.txt'),
+                        dag,
+                        name='first')
+    t2 = PythonCallable(fn2,
+                        File('/home/ido/filepath/file2.txt'),
+                        dag,
+                        name='second')
+    t3 = PythonCallable(fn2,
+                        File('/home/ido/filepath/file3.parquet'),
+                        dag,
+                        name='third')
+    t1 >> t2
+    t3 >> t2
+
+    res = telemetry.parse_dag(dag)
+    assert res['dag_size'] == 3
+    assert res == {
+        'dag_size': 3,
+        'tasks': {
+            'first': {
+                'status': 'WaitingRender',
+                'type': 'PythonCallable',
+                'upstream': {},
+                'products': 'file1.txt'
+            },
+            'third': {
+                'status': 'WaitingRender',
+                'type': 'PythonCallable',
+                'upstream': {},
+                'products': 'file3.parquet'
+            },
+            'second': {
+                'status': 'WaitingRender',
+                'type': 'PythonCallable',
+                'upstream': {
+                    'first': 'file1.txt',
+                    'third': 'file3.parquet'
+                },
+                'products': 'file2.txt'
+            }
+        }
+    }
+
+
 def test_is_not_online(monkeypatch):
     mock_httplib = Mock()
     mock_httplib.HTTPSConnection().request.side_effect = Exception