Skip to content
This repository has been archived by the owner on May 28, 2024. It is now read-only.

Commit

Permalink
Fix merging statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
scholtzan committed Oct 12, 2022
1 parent 95de66b commit 0476677
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 10 deletions.
20 changes: 17 additions & 3 deletions jetstream_config_parser/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,11 @@ def validate(self, configs: "ConfigCollection", experiment: Experiment) -> None:
raise ValueError("dataset_id needs to be explicitly set for private experiments")
elif isinstance(self.spec, MonitoringSpec):
monitoring_spec = MonitoringSpec.default_for_platform_or_type(
experiment.app_name, configs
(experiment.app_name if experiment else self.spec.project.platform)
or "firefox_desktop",
configs,
)
if experiment.is_rollout:
if experiment and experiment.is_rollout:
rollout_spec = MonitoringSpec.default_for_platform_or_type("rollout", configs)
monitoring_spec.merge(rollout_spec)
monitoring_spec.merge(self.spec)
Expand All @@ -79,13 +81,17 @@ def validate_config_settings(config_file: Path) -> None:
config = toml.loads(config_file.read_text())

optional_core_config_keys = (
"project",
"population",
"metrics",
"experiment",
"segments",
"data_sources",
"friendly_name",
"description",
"parameters",
"alerts",
"dimensions",
)

core_config_keys_specified = config.keys()
Expand Down Expand Up @@ -279,6 +285,7 @@ def from_github_repo(
with TemporaryDirectory() as tmp_dir:
if repo_url is not None and Path(repo_url).exists() and Path(repo_url).is_dir():
repo = Repo(repo_url)
tmp_dir = Path(repo_url)
else:
repo = Repo.clone_from(repo_url or cls.repo_url, tmp_dir)

Expand Down Expand Up @@ -369,7 +376,7 @@ def from_github_repos(
cls, repo_urls: Optional[List[str]] = None, is_private: bool = False
) -> "ConfigCollection":
"""Load configs from the provided repos."""
if repo_urls is None or len(repo_urls) > 0:
if repo_urls is None or len(repo_urls) < 1:
return ConfigCollection.from_github_repo()

configs = None
Expand Down Expand Up @@ -415,6 +422,13 @@ def get_platform_defaults(self, platform: str) -> Optional[DefinitionSpecSub]:

return None

def get_platform_definitions(self, platform: str) -> Optional[DefinitionSpecSub]:
for definition in self.definitions:
if platform == definition.slug:
return definition.spec

return None

def get_metric_definition(self, slug: str, app_name: str) -> Optional[MetricDefinition]:
for definition in self.definitions:
if app_name == definition.platform:
Expand Down
3 changes: 3 additions & 0 deletions jetstream_config_parser/data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class DataSource:
client_id_column = attr.ib(default="client_id", type=str)
submission_date_column = attr.ib(default="submission_date", type=str)
default_dataset = attr.ib(default=None, type=Optional[str])
build_id_column = attr.ib(default="SAFE.SUBSTR(application.build_id, 0, 8)", type=str)

EXPERIMENT_COLUMN_TYPES = (None, "simple", "native", "glean")

Expand Down Expand Up @@ -121,6 +122,7 @@ class DataSourceDefinition:
client_id_column: Optional[str] = "client_id"
submission_date_column: Optional[str] = "submission_date"
default_dataset: Optional[str] = None
build_id_column: str = "SAFE.SUBSTR(application.build_id, 0, 8)"

def resolve(self, spec: "DefinitionSpecSub") -> DataSource:
params: Dict[str, Any] = {"name": self.name, "from_expression": self.from_expression}
Expand All @@ -130,6 +132,7 @@ def resolve(self, spec: "DefinitionSpecSub") -> DataSource:
"client_id_column",
"submission_date_column",
"default_dataset",
"build_id_column",
):
v = getattr(self, k)
if v:
Expand Down
21 changes: 20 additions & 1 deletion jetstream_config_parser/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ class Metric:
description: Optional[str] = None
bigger_is_better: bool = True
analysis_bases: List[AnalysisBasis] = [AnalysisBasis.ENROLLMENTS]
type: str = "scalar"
category: Optional[str] = None


@attr.s(auto_attribs=True)
Expand Down Expand Up @@ -115,6 +117,8 @@ class MetricDefinition:
description: Optional[str] = None
bigger_is_better: bool = True
analysis_bases: Optional[List[AnalysisBasis]] = None
type: Optional[str] = None
category: Optional[str] = None

@staticmethod
def generate_select_expression(
Expand Down Expand Up @@ -189,6 +193,8 @@ def resolve(
description=dedent(self.description) if self.description else self.description,
bigger_is_better=self.bigger_is_better,
analysis_bases=self.analysis_bases or [AnalysisBasis.ENROLLMENTS],
type=self.type or "scalar",
category=self.category,
)

metrics_with_treatments = []
Expand Down Expand Up @@ -243,6 +249,11 @@ def resolve(

return metrics_with_treatments

def merge(self, other: "MetricDefinition"):
"""Merge with another metric definition."""
for key in attr.fields_dict(type(self)):
setattr(self, key, getattr(other, key) or getattr(self, key))


MetricsConfigurationType = Dict[AnalysisPeriod, List[Summary]]

Expand Down Expand Up @@ -317,7 +328,15 @@ def merge(self, other: "MetricsSpec"):
self.weekly += other.weekly
self.days28 += other.days28
self.overall += other.overall
self.definitions.update(other.definitions)

seen = []
for key, _ in self.definitions.items():
if key in other.definitions:
self.definitions[key].merge(other.definitions[key])
seen.append(key)
for key, definition in other.definitions.items():
if key not in seen:
self.definitions[key] = definition


converter.register_structure_hook(MetricsSpec, lambda obj, _type: MetricsSpec.from_dict(obj))
4 changes: 2 additions & 2 deletions jetstream_config_parser/monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def from_definition_spec(
) -> "MonitoringSpec":
from jetstream_config_parser.definition import DefinitionSpec

if not isinstance(spec, MonitoringSpec) or not isinstance(spec, DefinitionSpec):
if not isinstance(spec, MonitoringSpec) and not isinstance(spec, DefinitionSpec):
raise ValueError(f"Cannot create MonitoringSpec from {spec}")

if project is None:
Expand Down Expand Up @@ -165,7 +165,7 @@ def default_for_platform_or_type(
"""Return the default config for the provided platform."""
default_metrics = configs.get_platform_defaults(platform)

if default_metrics is None:
if default_metrics is None or not hasattr(default_metrics, "project"):
spec = cls()
else:
spec = cls.from_definition_spec(default_metrics)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def test_configs_from_multiple_repos(self):
assert config_collection.functions is not None

default_collection = ConfigCollection.from_github_repo()
assert config_collection.configs == default_collection.configs
assert len(config_collection.configs) == len(default_collection.configs)
assert config_collection.outcomes == default_collection.outcomes
assert config_collection.defaults == default_collection.defaults
assert config_collection.definitions == default_collection.definitions
assert len(config_collection.defaults) == len(default_collection.defaults)
assert len(config_collection.definitions) == len(default_collection.definitions)
39 changes: 39 additions & 0 deletions jetstream_config_parser/tests/test_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest
import toml

from jetstream_config_parser.definition import DefinitionSpec
from jetstream_config_parser.monitoring import MonitoringConfiguration, MonitoringSpec

TEST_DIR = Path(__file__).parent
Expand Down Expand Up @@ -166,6 +167,44 @@ def test_merge(self, config_collection):
assert test.metric.data_source.from_expression == "bar"
assert test2.metric.select_expression == "SELECT 2"

def test_merge_statistic(self, config_collection):
"""Test merging configs"""
config_str = dedent(
"""
[metrics]
[metrics.test]
select_expression = "SELECT 1"
data_source = "foo"
type = "histogram"
[data_sources]
[data_sources.foo]
from_expression = "test"
"""
)
spec = MonitoringSpec.from_definition_spec(DefinitionSpec.from_dict(toml.loads(config_str)))

config_str = dedent(
"""
[project]
name = "foo"
metrics = ["test"]
[metrics.test.statistics]
sum = {}
"""
)
spec2 = MonitoringSpec.from_dict(toml.loads(config_str))
spec.merge(spec2)
cfg = spec.resolve(experiment=None, configs=config_collection)

assert cfg.project.name == "foo"
test = [p for p in cfg.metrics if p.metric.name == "test"][0]
assert test.metric.select_expression == "SELECT 1"
assert test.metric.data_source.name == "foo"
assert test.metric.type == "histogram"
assert test.statistic.name == "sum"

def test_unknown_metric_failure(self, config_collection):
config_str = dedent(
"""
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,5 @@ def text_from_file(path):
long_description=text_from_file("README.md"),
long_description_content_type="text/markdown",
python_requires=">=3.6",
version="2022.10.4",
version="2022.10.5",
)

0 comments on commit 0476677

Please sign in to comment.