Skip to content

Commit

Permalink
Send anonymous telemetry to bugout (#11697)
Browse files Browse the repository at this point in the history
Uses the `humbug` library to send anonymized telemetry data
to bugout.

Repos must opt in to telemetry. We log a warning if a repo has
not opted in or out.

See the code and also https://www.pantsbuild.org/v2.4/docs/anonymous-telemetry
for more info on the data we send and how we preserve anonymity
and prevent leakage of proprietary information.

[ci skip-rust]

[ci skip-build-wheels]
  • Loading branch information
benjyw committed Mar 19, 2021
1 parent 9deb9ef commit ddd91e2
Show file tree
Hide file tree
Showing 9 changed files with 425 additions and 7 deletions.
9 changes: 6 additions & 3 deletions 3rdparty/python/constraints.txt
@@ -1,25 +1,28 @@
# Generated by build-support/bin/generate_lockfile.sh on Tue Mar 16 06:41:23 PM PDT 2021
# Generated by build-support/bin/generate_lockfile.sh on Wed Mar 17 11:10:48 PDT 2021
ansicolors==1.1.8
attrs==20.3.0
beautifulsoup4==4.6.3
bugout==0.1.8
certifi==2020.12.5
cffi==1.14.5
chardet==4.0.0
cryptography==3.4.6
fasteners==0.15
freezegun==1.0.0
humbug==0.1.9
idna==2.10
iniconfig==1.1.1
monotonic==1.5
mypy==0.800
mypy-extensions==0.4.3
packaging==20.9
pex==2.1.34
pip==20.2.3
pip==20.2.4
pluggy==0.13.1
psutil==5.7.0
py==1.10.0
pycparser==2.20
pydantic==1.8.1
pyOpenSSL==20.0.1
pyparsing==2.4.7
pystache==0.5.4
Expand All @@ -32,5 +35,5 @@ setuptools==53.1.0
six==1.15.0
toml==0.10.2
typed-ast==1.4.2
typing-extensions==3.7.4.2
typing-extensions==3.7.4.3
urllib3==1.26.4
6 changes: 5 additions & 1 deletion 3rdparty/python/requirements.txt
Expand Up @@ -3,6 +3,10 @@ beautifulsoup4>=4.6.0,<4.7
fasteners==0.15.0
freezegun==1.0.0

# Note: we use humbug to report telemetry. When upgrading, ensure the new version maintains the
# anonymity promise we make here: https://www.pantsbuild.org/docs/anonymous-telemetry
humbug==0.1.9

# The MyPy requirement should be maintained in lockstep with the requirement the Pants repo uses
# for the mypy task since it configures custom MyPy plugins. That requirement can be found via:
# ./pants help-all | \
Expand All @@ -25,4 +29,4 @@ requests[security]>=2.20.1
setproctitle==1.2
setuptools>=50.3.0,<54.0
toml==0.10.2
typing-extensions==3.7.4.2
typing-extensions==3.7.4.3
4 changes: 4 additions & 0 deletions pants.toml
Expand Up @@ -53,6 +53,10 @@ pants_ignore.add = [
"!/pants.pex",
]

[anonymous-telemetry]
enabled = true
repo_id = "7775F8D5-FC58-4DBC-9302-D00AE4A1505F"

[source]
root_patterns = [
"src/*",
Expand Down
3 changes: 2 additions & 1 deletion src/python/pants/core/register.py
Expand Up @@ -19,7 +19,7 @@
stripped_source_files,
subprocess_environment,
)
from pants.goal import stats_aggregator
from pants.goal import anonymous_telemetry, stats_aggregator
from pants.source import source_root


Expand All @@ -45,6 +45,7 @@ def rules():
*subprocess_environment.rules(),
*source_root.rules(),
*target_type_rules(),
*anonymous_telemetry.rules(),
*stats_aggregator.rules(),
]

Expand Down
192 changes: 192 additions & 0 deletions src/python/pants/goal/anonymous_telemetry.py
@@ -0,0 +1,192 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

import json
import logging
import re
import uuid
from typing import cast

from humbug.consent import HumbugConsent # type: ignore
from humbug.report import Modes, Report, Reporter # type: ignore

from pants.engine.internals.scheduler import Workunit
from pants.engine.rules import collect_rules, rule
from pants.engine.streaming_workunit_handler import (
StreamingWorkunitContext,
WorkunitsCallback,
WorkunitsCallbackFactory,
WorkunitsCallbackFactoryRequest,
)
from pants.engine.unions import UnionRule
from pants.option.subsystem import Subsystem
from pants.util.docutil import docs_url

logger = logging.getLogger(__name__)


_bugout_access_token = "3ae76900-9a68-4a87-a127-7c9f179d7272"
_bugout_journal_id = "801e9b3c-6b03-40a7-870f-5b25d326da66"
_telemetry_docs_url = docs_url("anonymous-telemetry")
_telemetry_docs_referral = f"See {_telemetry_docs_url} for details"


class AnonymousTelemetry(Subsystem):
options_scope = "anonymous-telemetry"
help = "Options related to sending anonymous stats to the Pants project, to aid development."

@classmethod
def register_options(cls, register):
register(
"--enabled",
advanced=True,
type=bool,
default=False,
help=(
f"Whether to send anonymous telemetry to the Pants project.\nTelemetry is sent "
f"asynchronously, with silent failure, and does not impact build times or "
f"outcomes.\n{_telemetry_docs_referral}."
),
)
register(
"--repo-id",
advanced=True,
type=str,
default=None,
help=(
f"An anonymized ID representing this repo.\nFor private repos, you likely want the "
f"ID to not be derived from, or algorithmically convertible to, anything "
f"identifying the repo.\nFor public repos the ID may be visible in that repo's "
f"config file, so anonymity of the repo is not guaranteed (although user anonymity "
f"is always guaranteed).\n{_telemetry_docs_referral}."
),
)

@property
def enabled(self) -> bool:
return cast(bool, self.options.enabled)

@property
def repo_id(self) -> str | None:
return cast("str | None", self.options.repo_id)


class AnonymousTelemetryCallback(WorkunitsCallback):
def __init__(self, anonymous_telemetry: AnonymousTelemetry) -> None:
super().__init__()
self._anonymous_telemetry = anonymous_telemetry

@property
def can_finish_async(self) -> bool:
# Because we don't log anything, it's safe to finish in the background.
return True

@staticmethod
def validate_repo_id(repo_id: str) -> bool:
is_valid = re.match(r"^[a-zA-Z0-9-_]{30,60}$", repo_id) is not None
if not is_valid:
logger.error(
"The repo_id must be between 30 and 60 characters long, and consist of only "
"alphanumeric characters, dashes and underscores."
)
return is_valid

def __call__(
self,
*,
started_workunits: tuple[Workunit, ...],
completed_workunits: tuple[Workunit, ...],
finished: bool,
context: StreamingWorkunitContext,
) -> None:
if not finished:
return

if self._anonymous_telemetry.options.is_default("enabled"):
logger.warning(
f"Please either set `enabled = true` in the [anonymous-telemetry] section of "
f"pants.toml to enable sending anonymous stats to the Pants project to aid "
f"development, or set `enabled = false` to disable it. No telemetry sent "
f"for this run. An explicit setting will get rid of this message. "
f"{_telemetry_docs_referral}."
)

if self._anonymous_telemetry.enabled:
repo_id = self._anonymous_telemetry.repo_id
if repo_id is None:
logger.error(
f'Please set `repo_id = "<uuid>"` in the [anonymous-telemetry] section '
f"of pants.toml, where `<uuid>` is some fixed random identifier, such as "
f"one generated by uuidgen. No telemetry sent for this run. "
f"{_telemetry_docs_referral}."
)
elif self.validate_repo_id(repo_id):
# Assemble and send the telemetry.
# Note that this method is called with finished=True only after the
# StreamingWorkunitHandler context ends, i.e., after end_run() has been called,
# so the RunTracker will have had a chance to finalize its state.
telemetry_data = context.run_tracker.get_anonymous_telemetry_data(repo_id)
# TODO: Add information about any errors that occurred.

reporter = Reporter(
name="pantsbuild/pants",
# We've already established consent at this point.
consent=HumbugConsent(True),
session_id=telemetry_data.get("run_id", str(uuid.uuid4())),
bugout_token=_bugout_access_token,
bugout_journal_id=_bugout_journal_id,
timeout_seconds=5,
# We don't want to spawn a thread in the engine, and we're
# already running in a background thread in pantsd.
mode=Modes.SYNCHRONOUS,
)

# This is copied from humbug code, to ensure that future changes to humbug
# don't add tags that inadvertently violate our anonymity promise.
system_tags = [
"humbug",
"source:{}".format(reporter.name),
"os:{}".format(reporter.system_information.os),
"arch:{}".format(reporter.system_information.machine),
"python:{}".format(reporter.system_information.python_version_major),
"python:{}.{}".format(
reporter.system_information.python_version_major,
reporter.system_information.python_version_minor,
),
"python:{}".format(reporter.system_information.python_version),
"session:{}".format(reporter.session_id),
]
tags = (
system_tags
+ [
f"pants_version:{telemetry_data.get('pants_version')}",
]
+ [f"goal:{goal}" for goal in telemetry_data.get("goals", [])]
)

report = Report(
title=f"pants run {reporter.session_id}",
tags=tags,
content=json.dumps(telemetry_data, sort_keys=True),
)
reporter.publish(report)


class AnonymousTelemetryCallbackFactoryRequest:
"""A unique request type that is installed to trigger construction of the WorkunitsCallback."""


@rule
def construct_callback(
_: AnonymousTelemetryCallbackFactoryRequest, anonymous_telemetry: AnonymousTelemetry
) -> WorkunitsCallbackFactory:
return WorkunitsCallbackFactory(lambda: AnonymousTelemetryCallback(anonymous_telemetry))


def rules():
return [
UnionRule(WorkunitsCallbackFactoryRequest, AnonymousTelemetryCallbackFactoryRequest),
*collect_rules(),
]
60 changes: 60 additions & 0 deletions src/python/pants/goal/anonymous_telemetry_integration_test.py
@@ -0,0 +1,60 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

from pants.testutil.pants_integration_test import run_pants

_no_explicit_setting_msg = "An explicit setting will get rid of this message"
_no_repo_id_msg = 'set `repo_id = "<uuid>"` in the [anonymous-telemetry] section of pants.toml'
_bad_repo_id_msg = "The repo_id must be between 30 and 60 characters long"


def test_warn_if_no_explicit_setting() -> None:
result = run_pants(["roots"], config={}, use_pantsd=False)
result.assert_success()
assert _no_explicit_setting_msg in result.stderr
assert _no_repo_id_msg not in result.stderr
assert _bad_repo_id_msg not in result.stderr


def test_warn_if_repo_id_unset() -> None:
result = run_pants(
["roots"], config={"anonymous-telemetry": {"enabled": True}}, use_pantsd=False
)
result.assert_success()
assert _no_explicit_setting_msg not in result.stderr
assert _no_repo_id_msg in result.stderr
assert _bad_repo_id_msg not in result.stderr


def test_warn_if_repo_id_invalid() -> None:
result = run_pants(
["roots"],
config={"anonymous-telemetry": {"enabled": True, "repo_id": "tooshort"}},
use_pantsd=False,
)
result.assert_success()
assert _no_explicit_setting_msg not in result.stderr
assert _no_repo_id_msg not in result.stderr
assert _bad_repo_id_msg in result.stderr


def test_no_warn_if_explicitly_on() -> None:
result = run_pants(
["roots"],
config={"anonymous-telemetry": {"enabled": True, "repo_id": 36 * "a"}},
use_pantsd=False,
)
result.assert_success()
assert _no_explicit_setting_msg not in result.stderr
assert _no_repo_id_msg not in result.stderr
assert _bad_repo_id_msg not in result.stderr


def test_no_warn_if_explicitly_off() -> None:
result = run_pants(["roots"], config={"anonymous-telemetry": {"enabled": False}})
result.assert_success()
assert _no_explicit_setting_msg not in result.stderr
assert _no_repo_id_msg not in result.stderr
assert _bad_repo_id_msg not in result.stderr
35 changes: 35 additions & 0 deletions src/python/pants/goal/anonymous_telemetry_test.py
@@ -0,0 +1,35 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import pytest

from pants.goal.anonymous_telemetry import AnonymousTelemetryCallback


@pytest.mark.parametrize(
"repo_id",
[
"a" * 30,
"2" * 31,
"C" * 60,
"c1db8737-06b4-4aa8-b18f-8cde023eb524",
"D2E39BA4_BA82_4A85_99DC_9E99E4528D3F",
],
)
def test_valid_repo_ids(repo_id) -> None:
assert AnonymousTelemetryCallback.validate_repo_id(repo_id)


@pytest.mark.parametrize(
"repo_id",
[
"",
"x",
"a" * 29,
"2" * 61,
"@c1db8737-06b4-4aa8-b18f-8cde023eb524",
"D2E39BA4-BA82-4A85-99DC-9Eá9E4528D3F",
],
)
def test_invalid_repo_ids(repo_id) -> None:
assert not AnonymousTelemetryCallback.validate_repo_id(repo_id)

0 comments on commit ddd91e2

Please sign in to comment.