Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 94 additions & 89 deletions .github/workflows/build-and-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,108 +12,113 @@ on:
merge_group:

jobs:
build:
environment: GH Actions
permissions:
contents: read
id-token: write
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Build venv and install dependencies
run: |
python -m venv venv/
venv/bin/pip install --no-deps --progress-bar off --upgrade -r requirements.txt
- name: Ruff lint
run: venv/bin/ruff check jetstream
- name: Ruff format
run: venv/bin/ruff format --check jetstream
- name: Mypy
run: venv/bin/mypy jetstream
- name: Authenticate to GCP and Generate ID Token
id: auth
uses: google-github-actions/auth@v2
with:
workload_identity_provider: ${{ vars.GCPV2_GITHUB_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ secrets.GCP_DRYRUN_SERVICE_ACCOUNT_EMAIL }}
token_format: 'id_token'
id_token_audience: 'https://us-central1-moz-fx-data-shared-prod.cloudfunctions.net/dryrun'
id_token_include_email: true
- name: Export ID Token for Python
env:
GOOGLE_GHA_ID_TOKEN: ${{ steps.auth.outputs.id_token }}
run: echo "GOOGLE_GHA_ID_TOKEN=$GOOGLE_GHA_ID_TOKEN" >> $GITHUB_ENV
- name: PyTest
run: venv/bin/pytest --ruff --ruff-format --ignore=jetstream/tests/integration/ -n 8
# build:
# environment: GH Actions
# permissions:
# contents: read
# id-token: write
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v4
# - name: Set up Python
# uses: actions/setup-python@v5
# with:
# python-version: '3.11'
# cache: 'pip'
# - name: Build venv and install dependencies
# run: |
# python -m venv venv/
# venv/bin/pip install --no-deps --progress-bar off --upgrade -r requirements.txt
# - name: Ruff lint
# run: venv/bin/ruff check jetstream
# - name: Ruff format
# run: venv/bin/ruff format --check jetstream
# - name: Mypy
# run: venv/bin/mypy jetstream
# - name: Authenticate to GCP and Generate ID Token
# id: auth
# uses: google-github-actions/auth@v2
# with:
# workload_identity_provider: ${{ vars.GCPV2_GITHUB_WORKLOAD_IDENTITY_PROVIDER }}
# service_account: ${{ secrets.GCP_DRYRUN_SERVICE_ACCOUNT_EMAIL }}
# token_format: 'id_token'
# id_token_audience: 'https://us-central1-moz-fx-data-shared-prod.cloudfunctions.net/dryrun'
# id_token_include_email: true
# - name: Export ID Token for Python
# env:
# GOOGLE_GHA_ID_TOKEN: ${{ steps.auth.outputs.id_token }}
# run: echo "GOOGLE_GHA_ID_TOKEN=$GOOGLE_GHA_ID_TOKEN" >> $GITHUB_ENV
# - name: PyTest
# run: venv/bin/pytest --ruff --ruff-format --ignore=jetstream/tests/integration/ -n 8

integration:
permissions:
contents: read
id-token: write
environment: GH Actions
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Build venv and install dependencies
run: |
python -m venv venv/
venv/bin/pip install --no-deps --progress-bar off --upgrade -r requirements.txt
- name: Authenticate to GCP (OIDC)
uses: google-github-actions/auth@v2
with:
workload_identity_provider: ${{ vars.GCPV2_GITHUB_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ secrets.GCP_INTEGRATION_SERVICE_ACCOUNT_EMAIL }}
- name: PyTest Integration Test
run: venv/bin/pytest --ruff --ruff-format --integration jetstream/tests/integration/ -n 8
# integration:
# permissions:
# contents: read
# id-token: write
# environment: GH Actions
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v4
# - name: Set up Python
# uses: actions/setup-python@v5
# with:
# python-version: '3.11'
# - name: Build venv and install dependencies
# run: |
# python -m venv venv/
# venv/bin/pip install --no-deps --progress-bar off --upgrade -r requirements.txt
# - name: Authenticate to GCP (OIDC)
# uses: google-github-actions/auth@v2
# with:
# workload_identity_provider: ${{ vars.GCPV2_GITHUB_WORKLOAD_IDENTITY_PROVIDER }}
# service_account: ${{ secrets.GCP_INTEGRATION_SERVICE_ACCOUNT_EMAIL }}
# - name: PyTest Integration Test
# run: venv/bin/pytest --ruff --ruff-format --integration jetstream/tests/integration/ -n 8

deploy:
permissions:
contents: read
id-token: write
environment:
name: pypi
url: https://pypi.org/p/mozilla-jetstream
if: startsWith(github.ref, 'refs/tags/')
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install deployment tools
run: pip install --upgrade build setuptools wheel twine
- name: Build distribution files
run: python -m build --sdist
- name: Publish distribution 📦 to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
# deploy:
# permissions:
# contents: read
# id-token: write
# environment:
# name: pypi
# url: https://pypi.org/p/mozilla-jetstream
# if: startsWith(github.ref, 'refs/tags/')
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v4
# - name: Set up Python
# uses: actions/setup-python@v5
# with:
# python-version: '3.11'
# - name: Install deployment tools
# run: pip install --upgrade build setuptools wheel twine
# - name: Build distribution files
# run: python -m build --sdist
# - name: Publish distribution 📦 to PyPI
# uses: pypa/gh-action-pypi-publish@release/v1

docker:
permissions:
contents: read
id-token: write
environment: GH Actions
if: github.ref == 'refs/heads/main'
# if: github.ref == 'refs/heads/main'
runs-on: ubuntu-latest
needs: [build, integration]
# needs: [build, integration]
steps:
- uses: actions/checkout@v4
- name: Build Docker image
run: docker build . -t jetstream:latest
if: ${{ ! startsWith(github.event.pull_request.title , '[ci breaking-change]') }}
run: docker build . -t gcr.io/moz-fx-data-experiments/jetstream:latest
- name: Build Docker image (breaking tag)
if: ${{ startsWith(github.event.pull_request.title , '[ci breaking-change]') }}
run: docker build . -t gcr.io/moz-fx-data-experiments/jetstream:latest -t gcr.io/moz-fx-data-experiments/jetstream:breaking
- name: Push Docker image to GAR
uses: mozilla-it/deploy-actions/docker-push@v3
uses: mozilla-it/deploy-actions/docker-push@b9d76236c1d1dd89daab4ea15651cbe76574267a
with:
project_id: moz-fx-data-experiments
local_image: jetstream
image_repo_host: gcr.io
image_repo_path: moz-fx-data-experiments/jetstream
image_tags: |-
gcr.io/moz-fx-data-experiments/jetstream:latest
gcr.io/moz-fx-data-experiments/jetstream:breaking
workload_identity_pool_project_number: ${{ vars.GCPV2_WORKLOAD_IDENTITY_POOL_PROJECT_NUMBER }}
service_account_name: artifact-writer
27 changes: 27 additions & 0 deletions .github/workflows/detect-possible-breaking-change.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Jetstream Detect Possible Breaking Change

on:
pull_request:
branches:
- main
paths:
- 'jetstream/workflows/run.yaml'
merge_group:

jobs:
breaking-warning:
permissions:
pull-requests: write
runs-on: ubuntu-latest
steps:
- name: Possible Breaking Change Message
uses: actions/github-script@v8
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: "⚠️ Detected changes to the Argo workflow, which could indicate breaking changes.\n\n Do you want to mark this image with the `breaking` tag using `[ci breaking-change]`?"
});
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ Jetstream automatically calculates metrics and applies statistical treatments to

For more information, see [the documentation](https://experimenter.info/jetstream/jetstream/).

## Deployment

Jetstream automatically builds and pushes a docker image when a PR is merged into the main branch.

Use the `[ci breaking-change]` prefix on a PR title in order to give the image the `"breaking"` tag, which helps Jetstream's automated versioning determine when an incompatible change has landed, and bump up to at least the latest compatible version so that analysis can succeed.

## Local installation

```bash
Expand Down
27 changes: 24 additions & 3 deletions jetstream/artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@

import attr
from google.cloud import artifactregistry
from google.cloud.artifactregistry import DockerImage

from jetstream.bigquery_client import BigQueryClient


def _hash_for_image(image):
return image.name.split("sha256:")[1]


@attr.s(auto_attribs=True, slots=True)
class ArtifactManager:
"""Access docker images in the artifact registry."""
Expand Down Expand Up @@ -42,11 +47,19 @@ def image_for_slug(self, slug: str) -> str:
Get the image that should be used to analyse the experiment with the provided slug.

The image is determined based on the oldest last updated timestamp of the analysis results
(in other words, the timestamp from the first time the experiment was analyzed).
(in other words, the timestamp from the first time the experiment was analyzed), unless
there is a newer image with the `breaking` tag, in which case this image is used.
"""
client = BigQueryClient(self.project, self.dataset)
last_updated = client.experiment_table_first_updated(slug=slug)

breaking_image = self._image_with_tag("breaking")
if breaking_image:
breaking_time = breaking_image.upload_time
# see note below about mypy ignore here
if last_updated and last_updated < breaking_time: # type: ignore
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess in theory if a breaking image is pushed around the time an analysis run is triggered/running (which uses the previous image version), the created tables for new experiments might end up having a creation timestamp < breaking_time. I don't expect that this is likely to happen, so probably not worth thinking too much about it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm I think you mean this scenario?

  1. analysis starts but has not written to any tables
  2. breaking image is pushed
  3. analysis writes to a table
    This is ok because a subsequent run should pick up the breaking image using the existing logic in _image_for_date (breaking image upload time is the latest image at the time of the table creation). In the other direction, if the breaking image is pushed just after table creation, then this check handles that (tables created before breaking image upload, so use the breaking image).

Or am I misunderstanding your scenario?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's the scenario I was thinking of. I was ignoring the _image_for_date, but makes sense. This shouldn't be a concern then

last_updated = breaking_time # type: ignore

if last_updated:
return self._image_for_date(last_updated)
else:
Expand Down Expand Up @@ -86,13 +99,21 @@ def _image_for_date(self, date: datetime) -> str:

if latest_updated:
# return hash of image closest to the provided date
return latest_updated.name.split("sha256:")[1]
return _hash_for_image(latest_updated)
elif earliest_uploaded:
# return hash of earliest image available if table got created before image got uploaded
return earliest_uploaded.name.split("sha256:")[1]
return _hash_for_image(earliest_uploaded)
else:
raise ValueError(f"No `{self.image}` docker image available in {self.project}")

def latest_image(self) -> str:
"""Return the latest docker image hash."""
return self._image_for_date(date=datetime.now(timezone.utc))

def _image_with_tag(self, tag) -> DockerImage | None:
"""Return the docker image for a given tag (or None if tag is not found)."""
for image in self.images:
if tag in image.tags:
return image

return None
12 changes: 6 additions & 6 deletions jetstream/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,20 +272,20 @@ def docker_images():
artifactregistry.DockerImage(
name="projects/moz-fx-data-experiments/locations/us/repositories/"
+ "gcr.io/dockerImages/jetstream@sha256:8c766a",
update_time=timestamp_pb2.Timestamp(seconds=1672578000), # 2023-01-01 01:00 am
upload_time=timestamp_pb2.Timestamp(seconds=1672578000), # 2023-01-01 01:00 am
update_time=timestamp_pb2.Timestamp(seconds=1672578000), # 2023-01-01 08:00 AM EST
upload_time=timestamp_pb2.Timestamp(seconds=1672578000), # 2023-01-01 08:00 AM EST
),
artifactregistry.DockerImage(
name="projects/moz-fx-data-experiments/locations/us/repositories/"
+ "gcr.io/dockerImages/unrelated@sha256:aaaaa",
update_time=timestamp_pb2.Timestamp(seconds=1677675600), # 2023-03-01 01:00 am
upload_time=timestamp_pb2.Timestamp(seconds=1672578000), # 2023-01-01 01:00 am
update_time=timestamp_pb2.Timestamp(seconds=1677675600), # 2023-03-01 08:00 AM EST
upload_time=timestamp_pb2.Timestamp(seconds=1672578000), # 2023-01-01 08:00 AM EST
),
artifactregistry.DockerImage(
name="projects/moz-fx-data-experiments/locations/us/repositories/"
+ "gcr.io/dockerImages/jetstream@sha256:xxxxx",
update_time=timestamp_pb2.Timestamp(seconds=1677675600), # 2023-03-01 01:00 am
upload_time=timestamp_pb2.Timestamp(seconds=1677675600), # 2023-03-01 01:00 am
update_time=timestamp_pb2.Timestamp(seconds=1677675600), # 2023-03-01 08:00 AM EST
upload_time=timestamp_pb2.Timestamp(seconds=1677675600), # 2023-03-01 08:00 AM EST
),
]

Expand Down
23 changes: 23 additions & 0 deletions jetstream/tests/test_artifacts.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime, timezone
from unittest import mock
from unittest.mock import MagicMock

import pytest
Expand Down Expand Up @@ -53,3 +54,25 @@ def test_image_for_date(self, docker_images):

image = artifact_manager._image_for_date(date=pytz.UTC.localize(datetime(2019, 2, 1)))
assert image == "8c766a"

@pytest.mark.parametrize(
("tags", "expected_image"), [(["breaking"], "xxxxx"), (["non-breaking"], "8c766a")]
)
def test_image_for_slug(self, docker_images, tags, expected_image):
artifact_client = MagicMock()
# update tags on most recent image
docker_images[2].tags = tags
artifact_client.list_docker_images.return_value = docker_images

with mock.patch("jetstream.artifacts.BigQueryClient") as mock_client:
bigquery_mock_client = MagicMock()
bigquery_mock_client.experiment_table_first_updated.return_value = datetime(
2023, 1, 1, tzinfo=pytz.UTC
)
mock_client.return_value = bigquery_mock_client

artifact_manager = ArtifactManager(
"moz-fx-data-experiments", "mozanalysis", "jetstream", artifact_client
)
image = artifact_manager.image_for_slug("test-slug")
assert image == expected_image
Loading