diff --git a/kedro-telemetry/RELEASE.md b/kedro-telemetry/RELEASE.md index d3dd74d5a..a4201d32e 100644 --- a/kedro-telemetry/RELEASE.md +++ b/kedro-telemetry/RELEASE.md @@ -1,4 +1,5 @@ -# Upcoming release +# Upcoming release 0.4.0 +* Updated the plugin to generate an unique UUID for each user of `kedro-telemetry`. # Release 0.3.2 * Updated plugin to share if a project is being run in a ci environment. diff --git a/kedro-telemetry/kedro_telemetry/plugin.py b/kedro-telemetry/kedro_telemetry/plugin.py index ff44d4f1d..4e8c90401 100644 --- a/kedro-telemetry/kedro_telemetry/plugin.py +++ b/kedro-telemetry/kedro_telemetry/plugin.py @@ -1,11 +1,11 @@ """Kedro Telemetry plugin for collecting Kedro usage data.""" -import getpass import hashlib import json import logging import os import sys +import uuid from copy import deepcopy from datetime import datetime from pathlib import Path @@ -15,6 +15,7 @@ import requests import toml import yaml +from appdirs import user_config_dir from kedro import __version__ as KEDRO_VERSION from kedro.framework.cli.cli import KedroCLI from kedro.framework.cli.hooks import cli_hook_impl @@ -41,6 +42,7 @@ "BUILDKITE", # https://buildkite.com/docs/pipelines/environment-variables } TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" +CONFIG_FILENAME = "telemetry.toml" logger = logging.getLogger(__name__) @@ -49,15 +51,45 @@ def _hash(string: str) -> str: return hashlib.sha512(bytes(string, encoding="utf8")).hexdigest() -def _get_hashed_username(): +def _get_or_create_uuid() -> str: + """ + Reads a UUID from a configuration file or generates and saves a new one if not present. + """ + config_path = user_config_dir("kedro") + full_path = os.path.join(config_path, CONFIG_FILENAME) + try: - username = getpass.getuser() - return _hash(username) - except Exception as exc: - logger.warning( - "Something went wrong with getting the username. Exception: %s", - exc, - ) + if os.path.exists(full_path): + with open(full_path) as f: + config = toml.load(f) + + if "telemetry" in config and "uuid" in config["telemetry"]: + return uuid.UUID(config["telemetry"]["uuid"]).hex + + # Generate a new UUID and save it to the config file + new_uuid = _generate_new_uuid(full_path) + + return new_uuid + + except Exception as e: + logging.error(f"Failed to retrieve UUID: {e}") + return "" + + +def _generate_new_uuid(full_path: str) -> str: + try: + config = {} + config["telemetry"] = {} + new_uuid = uuid.uuid4().hex + config["telemetry"]["uuid"] = new_uuid + + os.makedirs(os.path.dirname(full_path), exist_ok=True) + with open(full_path, "w") as f: + toml.dump(config, f) + + return new_uuid + except Exception as e: + logging.error(f"Failed to create UUID: {e}") return "" @@ -82,7 +114,7 @@ def before_command_run( return # get KedroCLI and its structure from actual project root - cli = KedroCLI(project_path=Path.cwd()) + cli = KedroCLI(project_path=project_metadata.project_path) cli_struct = _get_cli_structure(cli_obj=cli, get_help=False) masked_command_args = _mask_kedro_cli( cli_struct=cli_struct, command_args=command_args @@ -90,9 +122,9 @@ def before_command_run( main_command = masked_command_args[0] if masked_command_args else "kedro" logger.debug("You have opted into product usage analytics.") - hashed_username = _get_hashed_username() + user_uuid = _get_or_create_uuid() project_properties = _get_project_properties( - hashed_username, project_metadata.project_path + user_uuid, project_metadata.project_path ) cli_properties = _format_user_cli_data( project_properties, masked_command_args @@ -100,7 +132,7 @@ def before_command_run( _send_heap_event( event_name=f"Command run: {main_command}", - identity=hashed_username, + identity=user_uuid, properties=cli_properties, ) @@ -109,7 +141,7 @@ def before_command_run( generic_properties["main_command"] = main_command _send_heap_event( event_name="CLI command", - identity=hashed_username, + identity=user_uuid, properties=generic_properties, ) except Exception as exc: @@ -141,16 +173,16 @@ def after_catalog_created(self, catalog): logger.debug("You have opted into product usage analytics.") default_pipeline = pipelines.get("__default__") # __default__ - hashed_username = _get_hashed_username() + user_uuid = _get_or_create_uuid() - project_properties = _get_project_properties(hashed_username, self.project_path) + project_properties = _get_project_properties(user_uuid, self.project_path) project_statistics_properties = _format_project_statistics_data( project_properties, catalog, default_pipeline, pipelines ) _send_heap_event( event_name="Kedro Project Statistics", - identity=hashed_username, + identity=user_uuid, properties=project_statistics_properties, ) @@ -163,10 +195,10 @@ def _is_known_ci_env(known_ci_env_var_keys=KNOWN_CI_ENV_VAR_KEYS): return any(os.getenv(key) for key in known_ci_env_var_keys) -def _get_project_properties(hashed_username: str, project_path: str) -> Dict: +def _get_project_properties(user_uuid: str, project_path: str) -> Dict: hashed_package_name = _hash(PACKAGE_NAME) if PACKAGE_NAME else "undefined" properties = { - "username": hashed_username, + "username": user_uuid, "package_name": hashed_package_name, "project_version": KEDRO_VERSION, "telemetry_version": TELEMETRY_VERSION, diff --git a/kedro-telemetry/pyproject.toml b/kedro-telemetry/pyproject.toml index 9ab6933ac..16c4449d1 100644 --- a/kedro-telemetry/pyproject.toml +++ b/kedro-telemetry/pyproject.toml @@ -13,6 +13,7 @@ license = {text = "Apache Software License (Apache 2.0)"} dependencies = [ "kedro>=0.18.0", "requests~=2.20", + "appdirs>=1.4.4", ] dynamic = ["readme", "version"] diff --git a/kedro-telemetry/tests/test_plugin.py b/kedro-telemetry/tests/test_plugin.py index 6a3ee2cc5..96bd89bdd 100644 --- a/kedro-telemetry/tests/test_plugin.py +++ b/kedro-telemetry/tests/test_plugin.py @@ -131,8 +131,8 @@ def test_before_command_run(self, mocker, fake_metadata): mocked_anon_id.return_value = "digested" mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights") mocker.patch( - "kedro_telemetry.plugin._get_hashed_username", - return_value="hashed_username", + "kedro_telemetry.plugin._get_or_create_uuid", + return_value="user_uuid", ) mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event") @@ -140,7 +140,7 @@ def test_before_command_run(self, mocker, fake_metadata): command_args = ["--version"] telemetry_hook.before_command_run(fake_metadata, command_args) expected_properties = { - "username": "hashed_username", + "username": "user_uuid", "package_name": "digested", "project_version": kedro_version, "telemetry_version": TELEMETRY_VERSION, @@ -157,12 +157,12 @@ def test_before_command_run(self, mocker, fake_metadata): expected_calls = [ mocker.call( event_name="Command run: --version", - identity="hashed_username", + identity="user_uuid", properties=expected_properties, ), mocker.call( event_name="CLI command", - identity="hashed_username", + identity="user_uuid", properties=generic_properties, ), ] @@ -177,8 +177,8 @@ def test_before_command_run_with_tools(self, mocker, fake_metadata): mocked_anon_id.return_value = "digested" mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights") mocker.patch( - "kedro_telemetry.plugin._get_hashed_username", - return_value="hashed_username", + "kedro_telemetry.plugin._get_or_create_uuid", + return_value="user_uuid", ) mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event") @@ -188,7 +188,7 @@ def test_before_command_run_with_tools(self, mocker, fake_metadata): command_args = ["--version"] telemetry_hook.before_command_run(fake_metadata, command_args) expected_properties = { - "username": "hashed_username", + "username": "user_uuid", "package_name": "digested", "project_version": kedro_version, "telemetry_version": TELEMETRY_VERSION, @@ -207,12 +207,12 @@ def test_before_command_run_with_tools(self, mocker, fake_metadata): expected_calls = [ mocker.call( event_name="Command run: --version", - identity="hashed_username", + identity="user_uuid", properties=expected_properties, ), mocker.call( event_name="CLI command", - identity="hashed_username", + identity="user_uuid", properties=generic_properties, ), ] @@ -226,13 +226,17 @@ def test_before_command_run_empty_args(self, mocker, fake_metadata): mocked_anon_id = mocker.patch("kedro_telemetry.plugin._hash") mocked_anon_id.return_value = "digested" mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights") + mocker.patch( + "kedro_telemetry.plugin._get_or_create_uuid", + return_value="user_uuid", + ) mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event") telemetry_hook = KedroTelemetryCLIHooks() command_args = [] telemetry_hook.before_command_run(fake_metadata, command_args) expected_properties = { - "username": "digested", + "username": "user_uuid", "package_name": "digested", "project_version": kedro_version, "telemetry_version": TELEMETRY_VERSION, @@ -249,12 +253,12 @@ def test_before_command_run_empty_args(self, mocker, fake_metadata): expected_calls = [ mocker.call( event_name="Command run: kedro", - identity="digested", + identity="user_uuid", properties=expected_properties, ), mocker.call( event_name="CLI command", - identity="digested", + identity="user_uuid", properties=generic_properties, ), ] @@ -296,7 +300,7 @@ def test_before_command_run_anonymous(self, mocker, fake_metadata): mocked_anon_id = mocker.patch("kedro_telemetry.plugin._hash") mocked_anon_id.return_value = "digested" mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights") - mocker.patch("getpass.getuser", side_effect=Exception) + mocker.patch("builtins.open", side_effect=Exception) mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event") telemetry_hook = KedroTelemetryCLIHooks() @@ -474,8 +478,8 @@ def test_after_context_created_without_kedro_run( # noqa: PLR0913 mocker.patch("kedro_telemetry.plugin._hash", return_value="digested") mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights") mocker.patch( - "kedro_telemetry.plugin._get_hashed_username", - return_value="hashed_username", + "kedro_telemetry.plugin._get_or_create_uuid", + return_value="user_uuid", ) mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event") mocker.patch("kedro_telemetry.plugin.open") @@ -487,7 +491,7 @@ def test_after_context_created_without_kedro_run( # noqa: PLR0913 telemetry_hook.after_catalog_created(fake_catalog) project_properties = { - "username": "hashed_username", + "username": "user_uuid", "package_name": "digested", "project_version": kedro_version, "telemetry_version": TELEMETRY_VERSION, @@ -504,7 +508,7 @@ def test_after_context_created_without_kedro_run( # noqa: PLR0913 expected_call = mocker.call( event_name="Kedro Project Statistics", - identity="hashed_username", + identity="user_uuid", properties=expected_properties, ) @@ -530,8 +534,8 @@ def test_after_context_created_with_kedro_run( # noqa: PLR0913 mocker.patch("kedro_telemetry.plugin._hash", return_value="digested") mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights") mocker.patch( - "kedro_telemetry.plugin._get_hashed_username", - return_value="hashed_username", + "kedro_telemetry.plugin._get_or_create_uuid", + return_value="user_uuid", ) mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event") mocker.patch("kedro_telemetry.plugin.toml.load") @@ -546,7 +550,7 @@ def test_after_context_created_with_kedro_run( # noqa: PLR0913 telemetry_hook.after_catalog_created(fake_catalog) project_properties = { - "username": "hashed_username", + "username": "user_uuid", "package_name": "digested", "project_version": kedro_version, "telemetry_version": TELEMETRY_VERSION, @@ -563,7 +567,7 @@ def test_after_context_created_with_kedro_run( # noqa: PLR0913 expected_call = mocker.call( event_name="Kedro Project Statistics", - identity="hashed_username", + identity="user_uuid", properties=expected_properties, ) @@ -589,8 +593,8 @@ def test_after_context_created_with_kedro_run_and_tools( # noqa: PLR0913 mocker.patch("kedro_telemetry.plugin._hash", return_value="digested") mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights") mocker.patch( - "kedro_telemetry.plugin._get_hashed_username", - return_value="hashed_username", + "kedro_telemetry.plugin._get_or_create_uuid", + return_value="user_uuid", ) mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event") mocker.patch("builtins.open", mocker.mock_open(read_data=MOCK_PYPROJECT_TOOLS)) @@ -607,7 +611,7 @@ def test_after_context_created_with_kedro_run_and_tools( # noqa: PLR0913 telemetry_hook.after_catalog_created(fake_catalog) project_properties = { - "username": "hashed_username", + "username": "user_uuid", "package_name": "digested", "project_version": kedro_version, "telemetry_version": TELEMETRY_VERSION, @@ -626,7 +630,7 @@ def test_after_context_created_with_kedro_run_and_tools( # noqa: PLR0913 expected_call = mocker.call( event_name="Kedro Project Statistics", - identity="hashed_username", + identity="user_uuid", properties=expected_properties, )