Skip to content

Commit

Permalink
feat(telemetry): Unique User IDs in kedro-telemetry - merge only for …
Browse files Browse the repository at this point in the history
…kedro-telemetry release 0.4.0 (#596)

* Change to unique uuid

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>

* Fix tests

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>

* fix tests and empty value

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>

* fix lint

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>

* Reformat _get_or_create_uuid to not be entirely inside the try block

Signed-off-by: lrcouto <laurarccouto@gmail.com>

* Reformat _get_or_create_uuid to not be entirely inside the try block

Signed-off-by: lrcouto <laurarccouto@gmail.com>

* Remove incorrectly sent file

Signed-off-by: lrcouto <laurarccouto@gmail.com>

* Update RELEASE.md

Signed-off-by: lrcouto <laurarccouto@gmail.com>

* Return empty ID if the existing ID cannot be read

Signed-off-by: lrcouto <laurarccouto@gmail.com>

* Extract new uuid generation to its own function

Signed-off-by: lrcouto <laurarccouto@gmail.com>

* Convert .conf file to .toml

Signed-off-by: lrcouto <laurarccouto@gmail.com>

* Lint

Signed-off-by: lrcouto <laurarccouto@gmail.com>

* Remove redundant try/except block

Signed-off-by: lrcouto <laurarccouto@gmail.com>

* Add type hints, change variable names to be more descriptive

Signed-off-by: lrcouto <laurarccouto@gmail.com>

* Small fixes

Signed-off-by: lrcouto <laurarccouto@gmail.com>

* Make release note more explicit

Signed-off-by: lrcouto <laurarccouto@gmail.com>

---------

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>
Signed-off-by: lrcouto <laurarccouto@gmail.com>
Co-authored-by: lrcouto <laurarccouto@gmail.com>
Co-authored-by: L. R. Couto <57910428+lrcouto@users.noreply.github.com>
  • Loading branch information
3 people committed Mar 18, 2024
1 parent 2cbb71e commit 846db2b
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 46 deletions.
3 changes: 2 additions & 1 deletion kedro-telemetry/RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Upcoming release
# Upcoming release 0.4.0
* Updated the plugin to generate an unique UUID for each user of `kedro-telemetry`.

# Release 0.3.2
* Updated plugin to share if a project is being run in a ci environment.
Expand Down
70 changes: 51 additions & 19 deletions kedro-telemetry/kedro_telemetry/plugin.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""Kedro Telemetry plugin for collecting Kedro usage data."""

import getpass
import hashlib
import json
import logging
import os
import sys
import uuid
from copy import deepcopy
from datetime import datetime
from pathlib import Path
Expand All @@ -15,6 +15,7 @@
import requests
import toml
import yaml
from appdirs import user_config_dir
from kedro import __version__ as KEDRO_VERSION
from kedro.framework.cli.cli import KedroCLI
from kedro.framework.cli.hooks import cli_hook_impl
Expand All @@ -41,6 +42,7 @@
"BUILDKITE", # https://buildkite.com/docs/pipelines/environment-variables
}
TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
CONFIG_FILENAME = "telemetry.toml"

logger = logging.getLogger(__name__)

Expand All @@ -49,15 +51,45 @@ def _hash(string: str) -> str:
return hashlib.sha512(bytes(string, encoding="utf8")).hexdigest()


def _get_hashed_username():
def _get_or_create_uuid() -> str:
"""
Reads a UUID from a configuration file or generates and saves a new one if not present.
"""
config_path = user_config_dir("kedro")
full_path = os.path.join(config_path, CONFIG_FILENAME)

try:
username = getpass.getuser()
return _hash(username)
except Exception as exc:
logger.warning(
"Something went wrong with getting the username. Exception: %s",
exc,
)
if os.path.exists(full_path):
with open(full_path) as f:
config = toml.load(f)

if "telemetry" in config and "uuid" in config["telemetry"]:
return uuid.UUID(config["telemetry"]["uuid"]).hex

# Generate a new UUID and save it to the config file
new_uuid = _generate_new_uuid(full_path)

return new_uuid

except Exception as e:
logging.error(f"Failed to retrieve UUID: {e}")
return ""


def _generate_new_uuid(full_path: str) -> str:
try:
config = {}
config["telemetry"] = {}
new_uuid = uuid.uuid4().hex
config["telemetry"]["uuid"] = new_uuid

os.makedirs(os.path.dirname(full_path), exist_ok=True)
with open(full_path, "w") as f:
toml.dump(config, f)

return new_uuid
except Exception as e:
logging.error(f"Failed to create UUID: {e}")
return ""


Expand All @@ -82,25 +114,25 @@ def before_command_run(
return

# get KedroCLI and its structure from actual project root
cli = KedroCLI(project_path=Path.cwd())
cli = KedroCLI(project_path=project_metadata.project_path)
cli_struct = _get_cli_structure(cli_obj=cli, get_help=False)
masked_command_args = _mask_kedro_cli(
cli_struct=cli_struct, command_args=command_args
)
main_command = masked_command_args[0] if masked_command_args else "kedro"

logger.debug("You have opted into product usage analytics.")
hashed_username = _get_hashed_username()
user_uuid = _get_or_create_uuid()
project_properties = _get_project_properties(
hashed_username, project_metadata.project_path
user_uuid, project_metadata.project_path
)
cli_properties = _format_user_cli_data(
project_properties, masked_command_args
)

_send_heap_event(
event_name=f"Command run: {main_command}",
identity=hashed_username,
identity=user_uuid,
properties=cli_properties,
)

Expand All @@ -109,7 +141,7 @@ def before_command_run(
generic_properties["main_command"] = main_command
_send_heap_event(
event_name="CLI command",
identity=hashed_username,
identity=user_uuid,
properties=generic_properties,
)
except Exception as exc:
Expand Down Expand Up @@ -141,16 +173,16 @@ def after_catalog_created(self, catalog):
logger.debug("You have opted into product usage analytics.")

default_pipeline = pipelines.get("__default__") # __default__
hashed_username = _get_hashed_username()
user_uuid = _get_or_create_uuid()

project_properties = _get_project_properties(hashed_username, self.project_path)
project_properties = _get_project_properties(user_uuid, self.project_path)

project_statistics_properties = _format_project_statistics_data(
project_properties, catalog, default_pipeline, pipelines
)
_send_heap_event(
event_name="Kedro Project Statistics",
identity=hashed_username,
identity=user_uuid,
properties=project_statistics_properties,
)

Expand All @@ -163,10 +195,10 @@ def _is_known_ci_env(known_ci_env_var_keys=KNOWN_CI_ENV_VAR_KEYS):
return any(os.getenv(key) for key in known_ci_env_var_keys)


def _get_project_properties(hashed_username: str, project_path: str) -> Dict:
def _get_project_properties(user_uuid: str, project_path: str) -> Dict:
hashed_package_name = _hash(PACKAGE_NAME) if PACKAGE_NAME else "undefined"
properties = {
"username": hashed_username,
"username": user_uuid,
"package_name": hashed_package_name,
"project_version": KEDRO_VERSION,
"telemetry_version": TELEMETRY_VERSION,
Expand Down
1 change: 1 addition & 0 deletions kedro-telemetry/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ license = {text = "Apache Software License (Apache 2.0)"}
dependencies = [
"kedro>=0.18.0",
"requests~=2.20",
"appdirs>=1.4.4",
]
dynamic = ["readme", "version"]

Expand Down
56 changes: 30 additions & 26 deletions kedro-telemetry/tests/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,16 +131,16 @@ def test_before_command_run(self, mocker, fake_metadata):
mocked_anon_id.return_value = "digested"
mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights")
mocker.patch(
"kedro_telemetry.plugin._get_hashed_username",
return_value="hashed_username",
"kedro_telemetry.plugin._get_or_create_uuid",
return_value="user_uuid",
)

mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
telemetry_hook = KedroTelemetryCLIHooks()
command_args = ["--version"]
telemetry_hook.before_command_run(fake_metadata, command_args)
expected_properties = {
"username": "hashed_username",
"username": "user_uuid",
"package_name": "digested",
"project_version": kedro_version,
"telemetry_version": TELEMETRY_VERSION,
Expand All @@ -157,12 +157,12 @@ def test_before_command_run(self, mocker, fake_metadata):
expected_calls = [
mocker.call(
event_name="Command run: --version",
identity="hashed_username",
identity="user_uuid",
properties=expected_properties,
),
mocker.call(
event_name="CLI command",
identity="hashed_username",
identity="user_uuid",
properties=generic_properties,
),
]
Expand All @@ -177,8 +177,8 @@ def test_before_command_run_with_tools(self, mocker, fake_metadata):
mocked_anon_id.return_value = "digested"
mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights")
mocker.patch(
"kedro_telemetry.plugin._get_hashed_username",
return_value="hashed_username",
"kedro_telemetry.plugin._get_or_create_uuid",
return_value="user_uuid",
)

mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
Expand All @@ -188,7 +188,7 @@ def test_before_command_run_with_tools(self, mocker, fake_metadata):
command_args = ["--version"]
telemetry_hook.before_command_run(fake_metadata, command_args)
expected_properties = {
"username": "hashed_username",
"username": "user_uuid",
"package_name": "digested",
"project_version": kedro_version,
"telemetry_version": TELEMETRY_VERSION,
Expand All @@ -207,12 +207,12 @@ def test_before_command_run_with_tools(self, mocker, fake_metadata):
expected_calls = [
mocker.call(
event_name="Command run: --version",
identity="hashed_username",
identity="user_uuid",
properties=expected_properties,
),
mocker.call(
event_name="CLI command",
identity="hashed_username",
identity="user_uuid",
properties=generic_properties,
),
]
Expand All @@ -226,13 +226,17 @@ def test_before_command_run_empty_args(self, mocker, fake_metadata):
mocked_anon_id = mocker.patch("kedro_telemetry.plugin._hash")
mocked_anon_id.return_value = "digested"
mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights")
mocker.patch(
"kedro_telemetry.plugin._get_or_create_uuid",
return_value="user_uuid",
)

mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
telemetry_hook = KedroTelemetryCLIHooks()
command_args = []
telemetry_hook.before_command_run(fake_metadata, command_args)
expected_properties = {
"username": "digested",
"username": "user_uuid",
"package_name": "digested",
"project_version": kedro_version,
"telemetry_version": TELEMETRY_VERSION,
Expand All @@ -249,12 +253,12 @@ def test_before_command_run_empty_args(self, mocker, fake_metadata):
expected_calls = [
mocker.call(
event_name="Command run: kedro",
identity="digested",
identity="user_uuid",
properties=expected_properties,
),
mocker.call(
event_name="CLI command",
identity="digested",
identity="user_uuid",
properties=generic_properties,
),
]
Expand Down Expand Up @@ -296,7 +300,7 @@ def test_before_command_run_anonymous(self, mocker, fake_metadata):
mocked_anon_id = mocker.patch("kedro_telemetry.plugin._hash")
mocked_anon_id.return_value = "digested"
mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights")
mocker.patch("getpass.getuser", side_effect=Exception)
mocker.patch("builtins.open", side_effect=Exception)

mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
telemetry_hook = KedroTelemetryCLIHooks()
Expand Down Expand Up @@ -474,8 +478,8 @@ def test_after_context_created_without_kedro_run( # noqa: PLR0913
mocker.patch("kedro_telemetry.plugin._hash", return_value="digested")
mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights")
mocker.patch(
"kedro_telemetry.plugin._get_hashed_username",
return_value="hashed_username",
"kedro_telemetry.plugin._get_or_create_uuid",
return_value="user_uuid",
)
mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
mocker.patch("kedro_telemetry.plugin.open")
Expand All @@ -487,7 +491,7 @@ def test_after_context_created_without_kedro_run( # noqa: PLR0913
telemetry_hook.after_catalog_created(fake_catalog)

project_properties = {
"username": "hashed_username",
"username": "user_uuid",
"package_name": "digested",
"project_version": kedro_version,
"telemetry_version": TELEMETRY_VERSION,
Expand All @@ -504,7 +508,7 @@ def test_after_context_created_without_kedro_run( # noqa: PLR0913

expected_call = mocker.call(
event_name="Kedro Project Statistics",
identity="hashed_username",
identity="user_uuid",
properties=expected_properties,
)

Expand All @@ -530,8 +534,8 @@ def test_after_context_created_with_kedro_run( # noqa: PLR0913
mocker.patch("kedro_telemetry.plugin._hash", return_value="digested")
mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights")
mocker.patch(
"kedro_telemetry.plugin._get_hashed_username",
return_value="hashed_username",
"kedro_telemetry.plugin._get_or_create_uuid",
return_value="user_uuid",
)
mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
mocker.patch("kedro_telemetry.plugin.toml.load")
Expand All @@ -546,7 +550,7 @@ def test_after_context_created_with_kedro_run( # noqa: PLR0913
telemetry_hook.after_catalog_created(fake_catalog)

project_properties = {
"username": "hashed_username",
"username": "user_uuid",
"package_name": "digested",
"project_version": kedro_version,
"telemetry_version": TELEMETRY_VERSION,
Expand All @@ -563,7 +567,7 @@ def test_after_context_created_with_kedro_run( # noqa: PLR0913

expected_call = mocker.call(
event_name="Kedro Project Statistics",
identity="hashed_username",
identity="user_uuid",
properties=expected_properties,
)

Expand All @@ -589,8 +593,8 @@ def test_after_context_created_with_kedro_run_and_tools( # noqa: PLR0913
mocker.patch("kedro_telemetry.plugin._hash", return_value="digested")
mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights")
mocker.patch(
"kedro_telemetry.plugin._get_hashed_username",
return_value="hashed_username",
"kedro_telemetry.plugin._get_or_create_uuid",
return_value="user_uuid",
)
mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
mocker.patch("builtins.open", mocker.mock_open(read_data=MOCK_PYPROJECT_TOOLS))
Expand All @@ -607,7 +611,7 @@ def test_after_context_created_with_kedro_run_and_tools( # noqa: PLR0913
telemetry_hook.after_catalog_created(fake_catalog)

project_properties = {
"username": "hashed_username",
"username": "user_uuid",
"package_name": "digested",
"project_version": kedro_version,
"telemetry_version": TELEMETRY_VERSION,
Expand All @@ -626,7 +630,7 @@ def test_after_context_created_with_kedro_run_and_tools( # noqa: PLR0913

expected_call = mocker.call(
event_name="Kedro Project Statistics",
identity="hashed_username",
identity="user_uuid",
properties=expected_properties,
)

Expand Down

0 comments on commit 846db2b

Please sign in to comment.