From d57390b6eb3c7ae3a3d90f085722c9a4cbd9c756 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Fri, 10 Apr 2026 15:47:04 -0700 Subject: [PATCH 1/3] Add GH Action workflow to update downstream repos Searches the Nextstrain GitHub org to find repos that have the `.gitrepo` file with the nextstrain/shared remote to create a matrix of repos to potentially update. Installs and uses `git subrepo` to pull in the latest changes with the `--force` flag to avoid merge conflicts to due rebasing in the downstream repo. If there are changes pulled down, then `git subrepo` will create a single commit. If there is a single commit, then push up the changes to a branch and create or update the PR in the downstream repo. Nothing happens if there were no changes and workflow exits with error if it encounters more than one commit. --- .../workflows/update-downstream-repos.yaml | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 .github/workflows/update-downstream-repos.yaml diff --git a/.github/workflows/update-downstream-repos.yaml b/.github/workflows/update-downstream-repos.yaml new file mode 100644 index 0000000..f54cca6 --- /dev/null +++ b/.github/workflows/update-downstream-repos.yaml @@ -0,0 +1,102 @@ +name: Update downstream repos + +on: + push: + branches: + - main + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }} + cancel-in-progress: true + +jobs: + build-downstream-matrix: + runs-on: ubuntu-latest + outputs: + downstream-matrix: ${{ steps.downstream-matrix.outputs.downstream-matrix }} + steps: + - id: downstream-matrix + env: + GH_TOKEN: ${{ secrets.GH_TOKEN_NEXTSTRAIN_BOT_REPO }} + # Create an array of potential repos to update that will be used as the + # matrix to the next job. + # [ + # { "repo": "nextstrain/zika", "path": "shared/vendored"}, + # { "repo": "nextstrain/mpox", "path": "shared/vendored"}, + # ... + # ] + run: | + matrix=$(gh api -X GET search/code \ + -f q='org:nextstrain filename:.gitrepo "remote = https://github.com/nextstrain/shared"' \ + | jq -c ' + .items + | map({ + "repo": "\(.repository.full_name)", + "path": "\(.path | split("/")[0:-1] | join("/"))" + }) + ') + echo "downstream-matrix=$matrix" | tee -a "$GITHUB_OUTPUT" + update-downstream: + name: update-downstream (${{ matrix.repo }}, ${{ matrix.path }}) + needs: [build-downstream-matrix] + strategy: + fail-fast: false + matrix: + include: ${{ fromJson(needs.build-downstream-matrix.outputs.downstream-matrix) }} + env: + GIT_SUBREPO_DIR: .git/git-subrepo + VENDORED_PATH: ${{ matrix.path }} + branch: nextstrain-bot/update-vendored + runs-on: ubuntu-latest + steps: + - name: Checkout ${{ matrix.repo }} + uses: actions/checkout@v6 + with: + repository: ${{ matrix.repo }} + token: ${{ secrets.GH_TOKEN_NEXTSTRAIN_BOT_REPO }} + # Checkout git-subrepo _after_ the downstream repo to ensure that we + # keep it in a path within the downstream repo that does not interefere + # with the subrepo changes + - name: Checkout git-subrepo + uses: actions/checkout@v6 + with: + repository: "ingydotnet/git-subrepo" + path: ${{ env.GIT_SUBREPO_DIR }} + - name: Add git-subrepo to PATH + run: echo "$GIT_SUBREPO_DIR/lib" >> "$GITHUB_PATH" + - name: Update vendored path + run: | + git config user.name "${{ vars.GIT_USER_NAME_NEXTSTRAIN_BOT }}" + git config user.email "${{ vars.GIT_USER_EMAIL_NEXTSTRAIN_BOT }}" + + git switch -c "$branch" + git subrepo pull "$VENDORED_PATH" --force + - name: Create pull request + env: + GH_TOKEN: ${{ secrets.GH_TOKEN_NEXTSTRAIN_BOT_REPO }} + title: '[bot] Update ${{ env.VENDORED_PATH }}' + body: | + This PR was automaticaly created by http://github.com/nextstrain/shared/actions/runs/${{ github.run_id }} + to update the vendored subrepo in ${{ env.VENDORED_PATH }}. + + Subrepo updates were made with the `--force` flag so it overwrites any local changes in the subrepo. + run: | + default_branch=$(git remote show origin | sed -n '/HEAD branch/s/.*: //p') + changes=$(git rev-list --count "$default_branch".."$branch") + if [[ "$changes" == "1" ]]; then + git push --force origin HEAD + pr_url=$(gh pr list --head "$branch" --json url | jq -r '.[0].url') + + if [[ "$pr_url" == "null" ]]; then + pr_url="$(gh pr create --head "$branch" --title "$title" --body "$body")" + echo "Pull request created: $pr_url" >> "$GITHUB_STEP_SUMMARY" + else + echo "Pull request updated: $pr_url" >> "$GITHUB_STEP_SUMMARY" + fi + elif [[ "$changes" == "0" ]]; then + echo "No pull request created or updated because no changes were made" >> "$GITHUB_STEP_SUMMARY" + else + echo "ERROR: Encountered an unexpected number of changes: $changes" + exit 1 + fi From ada31609c3ace9f3cf3688122b562ccb30fbacd3 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Tue, 14 Apr 2026 14:46:37 -0700 Subject: [PATCH 2/3] .github/update-downstream-repos: Include old nextstrain/ingest I was unable to get the search/code API to work with the 'OR' syntax so just added a separate query for nextstrain/ingest and concatenated the two arrays. Deduplicated the final array to guard against potential overlap. --- .../workflows/update-downstream-repos.yaml | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/.github/workflows/update-downstream-repos.yaml b/.github/workflows/update-downstream-repos.yaml index f54cca6..0fea325 100644 --- a/.github/workflows/update-downstream-repos.yaml +++ b/.github/workflows/update-downstream-repos.yaml @@ -27,7 +27,7 @@ jobs: # ... # ] run: | - matrix=$(gh api -X GET search/code \ + shared_matrix=$(gh api -X GET search/code \ -f q='org:nextstrain filename:.gitrepo "remote = https://github.com/nextstrain/shared"' \ | jq -c ' .items @@ -36,6 +36,28 @@ jobs: "path": "\(.path | split("/")[0:-1] | join("/"))" }) ') + + # I was unable to get the 'OR' syntax to work with the search/code API, + # so making a separate query for the old nextstrain/ingest repo name. + # -Jover, 14 Apr 2026. + ingest_matrix=$(gh api -X GET search/code \ + -f q='org:nextstrain filename:.gitrepo "remote = https://github.com/nextstrain/ingest"' \ + | jq -c ' + .items + | map({ + "repo": "\(.repository.full_name)", + "path": "\(.path | split("/")[0:-1] | join("/"))" + }) + ') + + # There should not be any overlap between `shared_matrix` and `ingest_matrix` + # but deduplicating with `unique` just in case. + # -Jover, 14 Apr 2026. + matrix=$(jq -n \ + --argjson matrix1 "$shared_matrix" \ + --argjson matrix2 "$ingest_matrix" \ + -c '$matrix1 + $matrix2 | sort_by(.repo, .path) | unique') + echo "downstream-matrix=$matrix" | tee -a "$GITHUB_OUTPUT" update-downstream: name: update-downstream (${{ matrix.repo }}, ${{ matrix.path }}) From ab5399ff92062fc4b97eb2a49ca1737ee924f59b Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Wed, 15 Apr 2026 14:02:51 -0700 Subject: [PATCH 3/3] .github/update-downstream-repos: dedup matrix by repo We should only be keeping a single copy of the vendored repo in each downstream repo, so deduplicate the matrix by repo. In cases where there are multiple copies, we are prioritizing the `nextstrain/shared` remote since that is the newer version. This is prompted by the error in the workflow when avian-flu had two paths to update. --- .github/workflows/update-downstream-repos.yaml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/update-downstream-repos.yaml b/.github/workflows/update-downstream-repos.yaml index 0fea325..604ff91 100644 --- a/.github/workflows/update-downstream-repos.yaml +++ b/.github/workflows/update-downstream-repos.yaml @@ -50,13 +50,15 @@ jobs: }) ') - # There should not be any overlap between `shared_matrix` and `ingest_matrix` - # but deduplicating with `unique` just in case. - # -Jover, 14 Apr 2026. + # Deduplicate by repo since each repo should only have a single copy + # of the vendored repo. In cases where a repo has both, + # we are prioritizing the nextstrain/shared remote since that is + # the newer repo. + # -Jover, 15 Apr 2026. matrix=$(jq -n \ --argjson matrix1 "$shared_matrix" \ --argjson matrix2 "$ingest_matrix" \ - -c '$matrix1 + $matrix2 | sort_by(.repo, .path) | unique') + -c '$matrix1 + $matrix2 | unique_by(.repo)') echo "downstream-matrix=$matrix" | tee -a "$GITHUB_OUTPUT" update-downstream: