Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(telemetry): Unique User IDs in kedro-telemetry - merge only for kedro-telemetry release 0.4.0 #596

Merged
merged 18 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions kedro-telemetry/RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Upcoming release
* Updated the plugin to generate an unique UUID for each user of `kedro-telemetry`.
astrojuanlu marked this conversation as resolved.
Show resolved Hide resolved

# Release 0.3.2
* Updated plugin to share if a project is being run in a ci environment.
Expand Down
48 changes: 37 additions & 11 deletions kedro-telemetry/kedro_telemetry/plugin.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""Kedro Telemetry plugin for collecting Kedro usage data."""

import getpass
import hashlib
import json
import logging
import os
import sys
import uuid
from configparser import ConfigParser
from copy import deepcopy
from datetime import datetime
from pathlib import Path
Expand All @@ -15,6 +16,7 @@
import requests
import toml
import yaml
from appdirs import user_config_dir
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this a new dependency?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this a new dependency?

yes, we added it to the dependencies list

from kedro import __version__ as KEDRO_VERSION
from kedro.framework.cli.cli import KedroCLI
from kedro.framework.cli.hooks import cli_hook_impl
Expand All @@ -41,6 +43,7 @@
"BUILDKITE", # https://buildkite.com/docs/pipelines/environment-variables
}
TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
CONFIG_FILENAME = "telemetry.conf"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it expected to be commited into the repo?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it expected to be commited into the repo?

You mean this file? It will be stored in the user's configuration directory, not in project directory, so looks like it will not be possible to commit it.


logger = logging.getLogger(__name__)

Expand All @@ -49,15 +52,38 @@ def _hash(string: str) -> str:
return hashlib.sha512(bytes(string, encoding="utf8")).hexdigest()


def _get_hashed_username():
def _get_or_create_uuid():
"""
Reads a UUID from a configuration file or generates and saves a new one if not present.
"""
config_path = user_config_dir("kedro")
astrojuanlu marked this conversation as resolved.
Show resolved Hide resolved
full_path = os.path.join(config_path, CONFIG_FILENAME)
config = ConfigParser()
astrojuanlu marked this conversation as resolved.
Show resolved Hide resolved

try:
username = getpass.getuser()
return _hash(username)
except Exception as exc:
logger.warning(
"Something went wrong with getting the username. Exception: %s",
exc,
)
if os.path.exists(full_path):
config.read(full_path)

if config.has_section("telemetry") and "uuid" in config["telemetry"]:
try:
return uuid.UUID(config["telemetry"]["uuid"]).hex
except ValueError:
pass # Invalid UUID found, will generate a new one
astrojuanlu marked this conversation as resolved.
Show resolved Hide resolved

# Generate a new UUID and save it to the config file
if not config.has_section("telemetry"):
config.add_section("telemetry")
new_uuid = uuid.uuid4().hex
config.set("telemetry", "uuid", new_uuid)

os.makedirs(config_path, exist_ok=True)
with open(full_path, "w") as configfile:
config.write(configfile)

return new_uuid

except Exception as e:
logging.error(f"Failed to get or create UUID: {e}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be splitted into two function? doing both read and write in a function is a bit too much and hard to test.

For example, how are you testing for the case when config exist but telemetry section does not exist?

return ""


Expand Down Expand Up @@ -90,7 +116,7 @@ def before_command_run(
main_command = masked_command_args[0] if masked_command_args else "kedro"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't comment on the line above since it hasn't been changed but could line 117 be changed to -
cli = KedroCLI(project_path=metadata.project_path)
Following the changes in kedro-org/kedro#3683, I think this will allow us to capture commands called from within subdirectories also and mask them properly


logger.debug("You have opted into product usage analytics.")
hashed_username = _get_hashed_username()
hashed_username = _get_or_create_uuid()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should the variable be renamed to user_uuid or something like that now it's not a hashed username anymore?

project_properties = _get_project_properties(
hashed_username, project_metadata.project_path
)
Expand Down Expand Up @@ -141,7 +167,7 @@ def after_catalog_created(self, catalog):
logger.debug("You have opted into product usage analytics.")

default_pipeline = pipelines.get("__default__") # __default__
hashed_username = _get_hashed_username()
hashed_username = _get_or_create_uuid()

project_properties = _get_project_properties(hashed_username, self.project_path)

Expand Down
1 change: 1 addition & 0 deletions kedro-telemetry/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ license = {text = "Apache Software License (Apache 2.0)"}
dependencies = [
"kedro>=0.18.0",
"requests~=2.20",
"appdirs>=1.4.4",
]
dynamic = ["readme", "version"]

Expand Down
22 changes: 13 additions & 9 deletions kedro-telemetry/tests/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def test_before_command_run(self, mocker, fake_metadata):
mocked_anon_id.return_value = "digested"
mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights")
mocker.patch(
"kedro_telemetry.plugin._get_hashed_username",
"kedro_telemetry.plugin._get_or_create_uuid",
return_value="hashed_username",
)

Expand Down Expand Up @@ -177,7 +177,7 @@ def test_before_command_run_with_tools(self, mocker, fake_metadata):
mocked_anon_id.return_value = "digested"
mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights")
mocker.patch(
"kedro_telemetry.plugin._get_hashed_username",
"kedro_telemetry.plugin._get_or_create_uuid",
return_value="hashed_username",
)

Expand Down Expand Up @@ -226,13 +226,17 @@ def test_before_command_run_empty_args(self, mocker, fake_metadata):
mocked_anon_id = mocker.patch("kedro_telemetry.plugin._hash")
mocked_anon_id.return_value = "digested"
mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights")
mocker.patch(
"kedro_telemetry.plugin._get_or_create_uuid",
return_value="hashed_username",
)

mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
telemetry_hook = KedroTelemetryCLIHooks()
command_args = []
telemetry_hook.before_command_run(fake_metadata, command_args)
expected_properties = {
"username": "digested",
"username": "hashed_username",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT: This is the uuid now right? I know it's just test data, but maybe for clarity it would be helpful to specify the actual data so uuid in this case.

"package_name": "digested",
"project_version": kedro_version,
"telemetry_version": TELEMETRY_VERSION,
Expand All @@ -249,12 +253,12 @@ def test_before_command_run_empty_args(self, mocker, fake_metadata):
expected_calls = [
mocker.call(
event_name="Command run: kedro",
identity="digested",
identity="hashed_username",
properties=expected_properties,
),
mocker.call(
event_name="CLI command",
identity="digested",
identity="hashed_username",
properties=generic_properties,
),
]
Expand Down Expand Up @@ -296,7 +300,7 @@ def test_before_command_run_anonymous(self, mocker, fake_metadata):
mocked_anon_id = mocker.patch("kedro_telemetry.plugin._hash")
mocked_anon_id.return_value = "digested"
mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights")
mocker.patch("getpass.getuser", side_effect=Exception)
mocker.patch("builtins.open", side_effect=Exception)

mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
telemetry_hook = KedroTelemetryCLIHooks()
Expand Down Expand Up @@ -474,7 +478,7 @@ def test_after_context_created_without_kedro_run( # noqa: PLR0913
mocker.patch("kedro_telemetry.plugin._hash", return_value="digested")
mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights")
mocker.patch(
"kedro_telemetry.plugin._get_hashed_username",
"kedro_telemetry.plugin._get_or_create_uuid",
return_value="hashed_username",
)
mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
Expand Down Expand Up @@ -530,7 +534,7 @@ def test_after_context_created_with_kedro_run( # noqa: PLR0913
mocker.patch("kedro_telemetry.plugin._hash", return_value="digested")
mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights")
mocker.patch(
"kedro_telemetry.plugin._get_hashed_username",
"kedro_telemetry.plugin._get_or_create_uuid",
return_value="hashed_username",
)
mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
Expand Down Expand Up @@ -589,7 +593,7 @@ def test_after_context_created_with_kedro_run_and_tools( # noqa: PLR0913
mocker.patch("kedro_telemetry.plugin._hash", return_value="digested")
mocker.patch("kedro_telemetry.plugin.PACKAGE_NAME", "spaceflights")
mocker.patch(
"kedro_telemetry.plugin._get_hashed_username",
"kedro_telemetry.plugin._get_or_create_uuid",
return_value="hashed_username",
)
mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
Expand Down