Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

- n/a
- Searching for RPMs by sha256sum will now use the indexed `checksum` field on Pulp,
rather than the non-indexed `checksums.sha256` field. This can significantly improve
the performance of these searches on large systems.

## [2.12.1] - 2021-08-11

Expand Down
6 changes: 5 additions & 1 deletion pubtools/pulplib/_impl/model/unit/rpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,11 @@ class RpmUnit(Unit):
sha256sum = pulp_attrib(
default=None,
type=str,
pulp_field="checksums.sha256",
# Use 'checksum' field because it's indexed and therefore much faster than
# searching for checksums.sha256.
# It's safe since this is always stored as a copy of the sha256 checksum, see:
# https://github.com/pulp/pulp_rpm/blob/69759d0fb9a16c0a47b1f49c78f6712e650912e1/plugins/pulp_rpm/plugins/importers/yum/upload.py#L436
pulp_field="checksum",
converter=lambda s: s.lower() if s else s,
)
"""SHA256 checksum of this RPM, as a hex string."""
Expand Down
10 changes: 10 additions & 0 deletions pubtools/pulplib/_impl/schema/unit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,16 @@ definitions:
type: string
pattern: "^[a-f0-9]{64}$"

# SHA256 checksum.
# This duplicates checksums.sha256 above; the difference is that this field
# is a part of the unit key, so it's both mandatory & indexed.
# Also, though the original intent was probably to support multiple checksum
# types in this field, it is nowadays forced to sha256, see:
# https://github.com/pulp/pulp_rpm/blob/69759d0fb9a16c0a47b1f49c78f6712e650912e1/plugins/pulp_rpm/plugins/importers/yum/upload.py#L436
checksum:
type: string
pattern: "^[a-f0-9]{64}$"

repository_memberships:
type: array
items:
Expand Down
37 changes: 37 additions & 0 deletions tests/unit/test_rpm_sums.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from pubtools.pulplib import Unit


def test_rpm_sums():
"""Checksum values come from expected fields on pulp unit."""

unit = Unit.from_data(
{
"_content_type_id": "rpm",
"name": "bash",
"epoch": "0",
"filename": "bash-x86_64.rpm",
"version": "4.0",
"release": "1",
"arch": "x86_64",
# Sums are stored in a dict per algorithm...
"checksums": {
"md5": "aaa07a382ec010c01889250fce66fb13",
"sha1": "bbb9ae4aeea6946a8668445395ba10b7399523a0",
"sha256": "ccce93732fcf8d63fe1cce759664982dbd5b23161f007dba8561862adc96d063",
},
# But there is also a top-level "checksum" which is always sha256.
# Normally this should be exactly equal to checksums.sha256 of course;
# in this test we force a difference so we can tell which value was used.
"checksum": "ddde93732fcf8d63fe1cce759664982dbd5b23161f007dba8561862adc96d063",
}
)

# It should get these two from the checksums dict.
assert unit.md5sum == "aaa07a382ec010c01889250fce66fb13"
assert unit.sha1sum == "bbb9ae4aeea6946a8668445395ba10b7399523a0"

# This one should instead come from "checksum".
assert (
unit.sha256sum
== "ddde93732fcf8d63fe1cce759664982dbd5b23161f007dba8561862adc96d063"
)