Skip to content

Commit

Permalink
uplifts: query differential.querydiffs to find the latest non-`comm…
Browse files Browse the repository at this point in the history
…it` diff (Bug 1709608) (#364)

Extend `get_uplift_conduit_state` to also query `differential.querydiffs`
and return all known diffs associated with the selected uplift
revisions. When creating the uplift revisions, add a function to
parse through the diffs and return the latest diff that does not
have a `creationMethod` of `commit`. Then find the associated diff
response from `differential.diff.search` and pass both along to the
uplift revision creation function. When parsing binary from the
`getrawdiff` API response, re-use the metadata from the `querydiffs`
response so the pre-uploaded binary file is re-used in the uplift revision.
  • Loading branch information
cgsheeh committed Dec 14, 2023
1 parent 15f615a commit 550bf94
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 12 deletions.
21 changes: 17 additions & 4 deletions landoapi/api/uplift.py
Expand Up @@ -13,6 +13,8 @@
from landoapi.repos import get_repos_for_env
from landoapi.uplift import (
create_uplift_revision,
get_diff_info_if_missing,
get_latest_non_commit_diff,
get_local_uplift_repo,
get_uplift_conduit_state,
get_uplift_repositories,
Expand Down Expand Up @@ -67,7 +69,12 @@ def create(phab: PhabricatorClient, data: dict):
"target_repository": repo_name,
},
)
revision_data, revision_stack, target_repository = get_uplift_conduit_state(
(
revision_data,
revision_stack,
target_repository,
rev_ids_to_all_diffs,
) = get_uplift_conduit_state(
phab,
revision_id=revision_id,
target_repository_name=repo_name,
Expand Down Expand Up @@ -102,9 +109,14 @@ def create(phab: PhabricatorClient, data: dict):
# Get the revision.
revision = revision_data.revisions[phid]

# Get the relevant diff.
diff_phid = phab.expect(revision, "fields", "diffPHID")
diff = revision_data.diffs[diff_phid]
# Get the relevant diff in the `differential.querydiffs` response format.
querydiffs_diff = get_latest_non_commit_diff(
rev_ids_to_all_diffs[revision["id"]]
)
# Find the same diff but in the `differential.diff.search` response format.
diff = get_diff_info_if_missing(
phab, int(querydiffs_diff["id"]), revision_data.diffs.values()
)

# Get the parent commit PHID from the stack if available.
parent_phid = commit_stack[-1]["revision_phid"] if commit_stack else None
Expand All @@ -116,6 +128,7 @@ def create(phab: PhabricatorClient, data: dict):
local_repo,
revision,
diff,
querydiffs_diff,
parent_phid,
base_revision,
target_repository,
Expand Down
31 changes: 27 additions & 4 deletions landoapi/phabricator_patch.py
Expand Up @@ -94,11 +94,30 @@ def unix_file_mode(value: int) -> str:
return "{:06o}".format(value)


def serialize_patched_file(diff: dict, public_node: str) -> dict:
def serialize_patched_file(querydiffs_diff: dict, diff: dict, public_node: str) -> dict:
"""Convert a patch diff from `rs-parsepatch` format to Phabricator format."""
# Detect binary or test (not images)
metadata = {}
if diff["binary"] is True:
if diff["binary"] is True and querydiffs_diff:
file_type = FileType.BINARY
# Search the list of changes in the `differential.querydiffs` response for the
# change the corresponds to this file.
changes = [
change
for change in querydiffs_diff["changes"]
if change["currentPath"] == diff["filename"]
]

# There should only be one change that corresponds to this file.
if not changes or len(changes) > 1:
raise Exception("Found more than one change for this diff.")

# Use the metadata from the original diff for this new diff, since that diff
# will already have uploaded the file as binary and will have a PHID that can be
# used for reference.
metadata = changes[0]["metadata"]

elif diff["binary"] is True:
# We cannot detect the mime type from a file in the patch
# So no support for image file type
file_type = FileType.BINARY
Expand Down Expand Up @@ -156,7 +175,11 @@ def serialize_patched_file(diff: dict, public_node: str) -> dict:
}


def patch_to_changes(patch_content: str, public_node: str) -> list[dict]:
def patch_to_changes(
querydiffs_diff: dict, patch_content: str, public_node: str
) -> list[dict]:
"""Build a list of Phabricator changes from a raw diff"""
patch = rs_parsepatch.get_diffs(patch_content, hunks=True)
return [serialize_patched_file(diff, public_node) for diff in patch]
return [
serialize_patched_file(querydiffs_diff, diff, public_node) for diff in patch
]
67 changes: 64 additions & 3 deletions landoapi/uplift.py
Expand Up @@ -2,10 +2,12 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import collections
import json
import logging
import re
import time
from operator import itemgetter
from typing import (
Any,
Optional,
Expand Down Expand Up @@ -95,9 +97,29 @@ def get_revisions_without_bugs(phab: PhabricatorClient, revisions: dict) -> set[
return missing_bugs


def get_rev_ids_to_diffs(
phab: PhabricatorClient, rev_ids: list[int]
) -> dict[int, list[dict]]:
"""Given the list of revision ids, return a mapping of IDs to all associated diffs."""
# Query all diffs for the revisions with `differential.querydiffs`.
querydiffs_response = phab.call_conduit(
"differential.querydiffs", revisionIDs=rev_ids
)

if not querydiffs_response:
raise Exception(f"Could not find any diffs for revisions {rev_ids}.")

rev_ids_to_all_diffs = collections.defaultdict(list)
for diff in querydiffs_response.values():
rev_id = phab.expect(diff, "revisionID")
rev_ids_to_all_diffs[int(rev_id)].append(diff)

return rev_ids_to_all_diffs


def get_uplift_conduit_state(
phab: PhabricatorClient, revision_id: int, target_repository_name: str
) -> tuple[RevisionData, RevisionStack, dict]:
) -> tuple[RevisionData, RevisionStack, dict, dict]:
"""Queries Conduit for repository and stack information about the requested uplift.
Gathers information about:
Expand Down Expand Up @@ -143,9 +165,47 @@ def get_uplift_conduit_state(
f"Every uplifted patch must have an associated bug ID: {missing} do not."
)

rev_ids = [int(revision["id"]) for revision in stack_data.revisions.values()]
rev_ids_to_all_diffs = get_rev_ids_to_diffs(phab, rev_ids)

stack = RevisionStack(set(stack_data.revisions.keys()), edges)

return stack_data, stack, target_repo
return stack_data, stack, target_repo, rev_ids_to_all_diffs


def get_latest_non_commit_diff(diffs: list[dict]) -> dict:
"""Given a list of diff dicts, return the latest diff with a non-commit creation method.
Commits with a `creationMethod` of `commit` will have empty binary files, if any
are included in the commit. Avoid using them for creating uplift requests and instead
use the latest non-commit diff associated with the patch. See bug 1865760 for more.
"""
# Iterate through the diffs in order of the latest IDs.
for diff in sorted(diffs, key=itemgetter("id"), reverse=True):
# Diffs with a `creationMethod` of `commit` may have bad binary data.
if diff["creationMethod"] == "commit":
continue

return diff

raise Exception(f"Could not find an appropriate diff to return from {diffs}.")


def get_diff_info_if_missing(
phab: PhabricatorClient, diff_id: int, existing_diffs: list[dict]
) -> dict:
"""Check `existing_diffs` for a diff with `diff_id`, or query Conduit for the data."""
existing_diff = [diff for diff in existing_diffs if diff["id"] == diff_id]
if existing_diff:
return existing_diff[0]

diffs = phab.call_conduit(
"differential.diff.search",
constraints={"ids": [diff_id]},
attachments={"commits": True},
)

return phab.single(diffs, "data")


def get_local_uplift_repo(phab: PhabricatorClient, target_repository: dict) -> Repo:
Expand Down Expand Up @@ -190,6 +250,7 @@ def create_uplift_revision(
local_repo: Repo,
source_revision: dict,
source_diff: dict,
querydiffs_diff: dict,
parent_phid: Optional[str],
base_revision: str,
target_repository: dict,
Expand All @@ -215,7 +276,7 @@ def create_uplift_revision(
# Upload it on target repo.
new_diff = phab.call_conduit(
"differential.creatediff",
changes=patch_to_changes(raw_diff, head),
changes=patch_to_changes(querydiffs_diff, raw_diff, head),
sourceMachine=local_repo.url,
sourceControlSystem=phab.expect(target_repository, "fields", "vcs"),
sourceControlPath="/",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_phabricator_patch.py
Expand Up @@ -18,6 +18,6 @@ def test_patch_to_changes(patch_directory, patch_name):
patch_path = os.path.join(patch_directory, f"{patch_name}.diff")
result_path = os.path.join(patch_directory, f"{patch_name}.json")
with open(patch_path) as p:
output = patch_to_changes(p.read(), "deadbeef123")
output = patch_to_changes({}, p.read(), "deadbeef123")

assert output == json.load(open(result_path))
20 changes: 20 additions & 0 deletions tests/test_uplift.py
Expand Up @@ -16,6 +16,7 @@
from landoapi.uplift import (
add_original_revision_line_if_needed,
create_uplift_bug_update_payload,
get_latest_non_commit_diff,
get_revisions_without_bugs,
parse_milestone_version,
strip_depends_on_from_commit_message,
Expand Down Expand Up @@ -389,3 +390,22 @@ def test_get_revisions_without_bugs(phabdouble):
assert get_revisions_without_bugs(phab, revisions) == {
rev2["id"]
}, "Revision without associated bug should be returned."


def test_get_latest_non_commit_diff():
test_data = [
{"creationMethod": "commit", "id": 3},
{"creationMethod": "moz-phab-hg", "id": 1},
{"creationMethod": "commit", "id": 4},
{"creationMethod": "moz-phab-hg", "id": 2},
{"creationMethod": "commit", "id": 5},
]

diff = get_latest_non_commit_diff(test_data)

assert (
diff["id"] == 2
), "Returned diff should have the highest diff ID without `commit`."
assert (
diff["creationMethod"] != "commit"
), "Diffs with a `creationMethod` of `commit` should be skipped."

0 comments on commit 550bf94

Please sign in to comment.