Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Send anonymous telemetry to bugout #11697

Merged
merged 5 commits into from Mar 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 6 additions & 3 deletions 3rdparty/python/constraints.txt
@@ -1,25 +1,28 @@
# Generated by build-support/bin/generate_lockfile.sh on Tue Mar 16 06:41:23 PM PDT 2021
# Generated by build-support/bin/generate_lockfile.sh on Wed Mar 17 11:10:48 PDT 2021
ansicolors==1.1.8
attrs==20.3.0
beautifulsoup4==4.6.3
bugout==0.1.8
certifi==2020.12.5
cffi==1.14.5
chardet==4.0.0
cryptography==3.4.6
fasteners==0.15
freezegun==1.0.0
humbug==0.1.9
idna==2.10
iniconfig==1.1.1
monotonic==1.5
mypy==0.800
mypy-extensions==0.4.3
packaging==20.9
pex==2.1.34
pip==20.2.3
pip==20.2.4
pluggy==0.13.1
psutil==5.7.0
py==1.10.0
pycparser==2.20
pydantic==1.8.1
pyOpenSSL==20.0.1
pyparsing==2.4.7
pystache==0.5.4
Expand All @@ -32,5 +35,5 @@ setuptools==53.1.0
six==1.15.0
toml==0.10.2
typed-ast==1.4.2
typing-extensions==3.7.4.2
typing-extensions==3.7.4.3
urllib3==1.26.4
6 changes: 5 additions & 1 deletion 3rdparty/python/requirements.txt
Expand Up @@ -3,6 +3,10 @@ beautifulsoup4>=4.6.0,<4.7
fasteners==0.15.0
freezegun==1.0.0

# Note: we use humbug to report telemetry. When upgrading, ensure the new version maintains the
# anonymity promise we make here: https://www.pantsbuild.org/docs/anonymous-telemetry
benjyw marked this conversation as resolved.
Show resolved Hide resolved
benjyw marked this conversation as resolved.
Show resolved Hide resolved
humbug==0.1.9

# The MyPy requirement should be maintained in lockstep with the requirement the Pants repo uses
# for the mypy task since it configures custom MyPy plugins. That requirement can be found via:
# ./pants help-all | \
Expand All @@ -25,4 +29,4 @@ requests[security]>=2.20.1
setproctitle==1.2
setuptools>=50.3.0,<54.0
toml==0.10.2
typing-extensions==3.7.4.2
typing-extensions==3.7.4.3
4 changes: 4 additions & 0 deletions pants.toml
Expand Up @@ -53,6 +53,10 @@ pants_ignore.add = [
"!/pants.pex",
]

[anonymous-telemetry]
enabled = true
repo_id = "7775F8D5-FC58-4DBC-9302-D00AE4A1505F"

[source]
root_patterns = [
"src/*",
Expand Down
3 changes: 2 additions & 1 deletion src/python/pants/core/register.py
Expand Up @@ -19,7 +19,7 @@
stripped_source_files,
subprocess_environment,
)
from pants.goal import stats_aggregator
from pants.goal import anonymous_telemetry, stats_aggregator
from pants.source import source_root


Expand All @@ -45,6 +45,7 @@ def rules():
*subprocess_environment.rules(),
*source_root.rules(),
*target_type_rules(),
*anonymous_telemetry.rules(),
*stats_aggregator.rules(),
]

Expand Down
192 changes: 192 additions & 0 deletions src/python/pants/goal/anonymous_telemetry.py
@@ -0,0 +1,192 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

import json
import logging
import re
import uuid
from typing import cast

from humbug.consent import HumbugConsent # type: ignore
from humbug.report import Modes, Report, Reporter # type: ignore

from pants.engine.internals.scheduler import Workunit
from pants.engine.rules import collect_rules, rule
from pants.engine.streaming_workunit_handler import (
StreamingWorkunitContext,
WorkunitsCallback,
WorkunitsCallbackFactory,
WorkunitsCallbackFactoryRequest,
)
from pants.engine.unions import UnionRule
from pants.option.subsystem import Subsystem
from pants.util.docutil import docs_url

logger = logging.getLogger(__name__)


_bugout_access_token = "3ae76900-9a68-4a87-a127-7c9f179d7272"
benjyw marked this conversation as resolved.
Show resolved Hide resolved
_bugout_journal_id = "801e9b3c-6b03-40a7-870f-5b25d326da66"
_telemetry_docs_url = docs_url("anonymous-telemetry")
_telemetry_docs_referral = f"See {_telemetry_docs_url} for details"


class AnonymousTelemetry(Subsystem):
options_scope = "anonymous-telemetry"
help = "Options related to sending anonymous stats to the Pants project, to aid development."

@classmethod
def register_options(cls, register):
register(
"--enabled",
advanced=True,
benjyw marked this conversation as resolved.
Show resolved Hide resolved
type=bool,
default=False,
help=(
f"Whether to send anonymous telemetry to the Pants project.\nTelemetry is sent "
f"asynchronously, with silent failure, and does not impact build times or "
f"outcomes.\n{_telemetry_docs_referral}."
),
)
register(
"--repo-id",
advanced=True,
benjyw marked this conversation as resolved.
Show resolved Hide resolved
type=str,
default=None,
help=(
f"An anonymized ID representing this repo.\nFor private repos, you likely want the "
f"ID to not be derived from, or algorithmically convertible to, anything "
f"identifying the repo.\nFor public repos the ID may be visible in that repo's "
f"config file, so anonymity of the repo is not guaranteed (although user anonymity "
f"is always guaranteed).\n{_telemetry_docs_referral}."
),
)

@property
def enabled(self) -> bool:
return cast(bool, self.options.enabled)

@property
def repo_id(self) -> str | None:
return cast("str | None", self.options.repo_id)


class AnonymousTelemetryCallback(WorkunitsCallback):
def __init__(self, anonymous_telemetry: AnonymousTelemetry) -> None:
super().__init__()
self._anonymous_telemetry = anonymous_telemetry

benjyw marked this conversation as resolved.
Show resolved Hide resolved
@property
def can_finish_async(self) -> bool:
# Because we don't log anything, it's safe to finish in the background.
return True
benjyw marked this conversation as resolved.
Show resolved Hide resolved

@staticmethod
def validate_repo_id(repo_id: str) -> bool:
is_valid = re.match(r"^[a-zA-Z0-9-_]{30,60}$", repo_id) is not None
benjyw marked this conversation as resolved.
Show resolved Hide resolved
if not is_valid:
logger.error(
"The repo_id must be between 30 and 60 characters long, and consist of only "
"alphanumeric characters, dashes and underscores."
)
return is_valid

def __call__(
self,
*,
started_workunits: tuple[Workunit, ...],
completed_workunits: tuple[Workunit, ...],
finished: bool,
context: StreamingWorkunitContext,
) -> None:
if not finished:
return

if self._anonymous_telemetry.options.is_default("enabled"):
logger.warning(
f"Please either set `enabled = true` in the [anonymous-telemetry] section of "
benjyw marked this conversation as resolved.
Show resolved Hide resolved
f"pants.toml to enable sending anonymous stats to the Pants project to aid "
f"development, or set `enabled = false` to disable it. No telemetry sent "
f"for this run. An explicit setting will get rid of this message. "
f"{_telemetry_docs_referral}."
benjyw marked this conversation as resolved.
Show resolved Hide resolved
)

if self._anonymous_telemetry.enabled:
repo_id = self._anonymous_telemetry.repo_id
if repo_id is None:
logger.error(
f'Please set `repo_id = "<uuid>"` in the [anonymous-telemetry] section '
f"of pants.toml, where `<uuid>` is some fixed random identifier, such as "
f"one generated by uuidgen. No telemetry sent for this run. "
f"{_telemetry_docs_referral}."
)
elif self.validate_repo_id(repo_id):
# Assemble and send the telemetry.
# Note that this method is called with finished=True only after the
# StreamingWorkunitHandler context ends, i.e., after end_run() has been called,
# so the RunTracker will have had a chance to finalize its state.
telemetry_data = context.run_tracker.get_anonymous_telemetry_data(repo_id)
# TODO: Add information about any errors that occurred.

reporter = Reporter(
name="pantsbuild/pants",
# We've already established consent at this point.
consent=HumbugConsent(True),
session_id=telemetry_data.get("run_id", str(uuid.uuid4())),
bugout_token=_bugout_access_token,
bugout_journal_id=_bugout_journal_id,
timeout_seconds=5,
# We don't want to spawn a thread in the engine, and we're
# already running in a background thread in pantsd.
mode=Modes.SYNCHRONOUS,
)

# This is copied from humbug code, to ensure that future changes to humbug
# don't add tags that inadvertently violate our anonymity promise.
system_tags = [
"humbug",
"source:{}".format(reporter.name),
"os:{}".format(reporter.system_information.os),
"arch:{}".format(reporter.system_information.machine),
"python:{}".format(reporter.system_information.python_version_major),
"python:{}.{}".format(
reporter.system_information.python_version_major,
reporter.system_information.python_version_minor,
),
"python:{}".format(reporter.system_information.python_version),
"session:{}".format(reporter.session_id),
]
tags = (
system_tags
+ [
f"pants_version:{telemetry_data.get('pants_version')}",
]
+ [f"goal:{goal}" for goal in telemetry_data.get("goals", [])]
)

report = Report(
title=f"pants run {reporter.session_id}",
tags=tags,
content=json.dumps(telemetry_data, sort_keys=True),
)
benjyw marked this conversation as resolved.
Show resolved Hide resolved
reporter.publish(report)


class AnonymousTelemetryCallbackFactoryRequest:
"""A unique request type that is installed to trigger construction of the WorkunitsCallback."""


@rule
def construct_callback(
_: AnonymousTelemetryCallbackFactoryRequest, anonymous_telemetry: AnonymousTelemetry
) -> WorkunitsCallbackFactory:
return WorkunitsCallbackFactory(lambda: AnonymousTelemetryCallback(anonymous_telemetry))


def rules():
return [
UnionRule(WorkunitsCallbackFactoryRequest, AnonymousTelemetryCallbackFactoryRequest),
*collect_rules(),
]
60 changes: 60 additions & 0 deletions src/python/pants/goal/anonymous_telemetry_integration_test.py
@@ -0,0 +1,60 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

from pants.testutil.pants_integration_test import run_pants

_no_explicit_setting_msg = "An explicit setting will get rid of this message"
_no_repo_id_msg = 'set `repo_id = "<uuid>"` in the [anonymous-telemetry] section of pants.toml'
_bad_repo_id_msg = "The repo_id must be between 30 and 60 characters long"


def test_warn_if_no_explicit_setting() -> None:
result = run_pants(["roots"], config={}, use_pantsd=False)
result.assert_success()
assert _no_explicit_setting_msg in result.stderr
assert _no_repo_id_msg not in result.stderr
assert _bad_repo_id_msg not in result.stderr


def test_warn_if_repo_id_unset() -> None:
result = run_pants(
["roots"], config={"anonymous-telemetry": {"enabled": True}}, use_pantsd=False
)
result.assert_success()
assert _no_explicit_setting_msg not in result.stderr
assert _no_repo_id_msg in result.stderr
assert _bad_repo_id_msg not in result.stderr


def test_warn_if_repo_id_invalid() -> None:
result = run_pants(
["roots"],
config={"anonymous-telemetry": {"enabled": True, "repo_id": "tooshort"}},
use_pantsd=False,
)
result.assert_success()
assert _no_explicit_setting_msg not in result.stderr
assert _no_repo_id_msg not in result.stderr
assert _bad_repo_id_msg in result.stderr


def test_no_warn_if_explicitly_on() -> None:
result = run_pants(
["roots"],
config={"anonymous-telemetry": {"enabled": True, "repo_id": 36 * "a"}},
use_pantsd=False,
)
result.assert_success()
assert _no_explicit_setting_msg not in result.stderr
assert _no_repo_id_msg not in result.stderr
assert _bad_repo_id_msg not in result.stderr


def test_no_warn_if_explicitly_off() -> None:
result = run_pants(["roots"], config={"anonymous-telemetry": {"enabled": False}})
result.assert_success()
assert _no_explicit_setting_msg not in result.stderr
assert _no_repo_id_msg not in result.stderr
assert _bad_repo_id_msg not in result.stderr
35 changes: 35 additions & 0 deletions src/python/pants/goal/anonymous_telemetry_test.py
@@ -0,0 +1,35 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import pytest

from pants.goal.anonymous_telemetry import AnonymousTelemetryCallback


@pytest.mark.parametrize(
"repo_id",
[
"a" * 30,
"2" * 31,
"C" * 60,
"c1db8737-06b4-4aa8-b18f-8cde023eb524",
"D2E39BA4_BA82_4A85_99DC_9E99E4528D3F",
],
)
def test_valid_repo_ids(repo_id) -> None:
assert AnonymousTelemetryCallback.validate_repo_id(repo_id)


@pytest.mark.parametrize(
"repo_id",
[
"",
"x",
"a" * 29,
"2" * 61,
"@c1db8737-06b4-4aa8-b18f-8cde023eb524",
"D2E39BA4-BA82-4A85-99DC-9Eá9E4528D3F",
],
)
def test_invalid_repo_ids(repo_id) -> None:
assert not AnonymousTelemetryCallback.validate_repo_id(repo_id)