Skip to content

Commit

Permalink
Migrate test models to reusable test fixtures (#218)
Browse files Browse the repository at this point in the history
Since we will be writing different routines for testing, it is better to
use the same test "models" in testing, so let's reuse the same code.

Signed-off-by: Mihai Maruseac <mihaimaruseac@google.com>
  • Loading branch information
mihaimaruseac committed Jun 21, 2024
1 parent 6808ad9 commit 96e52b4
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 75 deletions.
104 changes: 29 additions & 75 deletions model_signing/serialization/dfs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,69 +19,11 @@
from model_signing.hashing import file
from model_signing.hashing import memory
from model_signing.serialization import dfs
from model_signing.serialization import fixtures_constants


# some constants used throughout testing
_KNOWN_MODEL_TEXT: bytes = b"This is a simple model"
_ANOTHER_MODEL_TEXT: bytes = b"This is another simple model"


# Note: Don't make fixtures with global scope as we are altering the models!
@pytest.fixture
def sample_model_file(tmp_path_factory):
file = tmp_path_factory.mktemp("model") / "file"
file.write_bytes(_KNOWN_MODEL_TEXT)
return file


@pytest.fixture
def empty_model_file(tmp_path_factory):
file = tmp_path_factory.mktemp("model") / "file"
file.write_bytes(b"")
return file


@pytest.fixture
def sample_model_folder(tmp_path_factory):
model_root = tmp_path_factory.mktemp("model") / "root"
model_root.mkdir()

for i in range(2):
root_dir = model_root / f"d{i}"
root_dir.mkdir()
for j in range(3):
dir_file = root_dir / f"f{i}{j}"
dir_file.write_text(f"This is file f{i}{j} in d{i}.")

for i in range(4):
root_file = model_root / f"f{i}"
root_file.write_text(f"This is file f{i} in root.")

return model_root


@pytest.fixture
def empty_model_folder(tmp_path_factory):
model_root = tmp_path_factory.mktemp("model") / "root"
model_root.mkdir()
return model_root


@pytest.fixture
def deep_model_folder(tmp_path_factory):
model_root = tmp_path_factory.mktemp("model") / "root"
model_root.mkdir()

current = model_root
for i in range(5):
current = current / f"d{i}"
current.mkdir()

for i in range(4):
file = current / f"f{i}"
file.write_text(f"This is file f{i}.")

return model_root
# Load fixtures from serialization/fixtures.py
pytest_plugins = ("model_signing.serialization.fixtures",)


class TestDFSSerializer:
Expand All @@ -99,7 +41,7 @@ def test_file_hash_is_same_as_hash_of_content(self, sample_model_file):
file_hasher = file.FileHasher("unused", memory.SHA256())
serializer = dfs.DFSSerializer(file_hasher, memory.SHA256)
manifest = serializer.serialize(sample_model_file)
digest = memory.SHA256(_KNOWN_MODEL_TEXT).compute()
digest = memory.SHA256(fixtures_constants.KNOWN_MODEL_TEXT).compute()
assert manifest.digest.digest_hex == digest.digest_hex

def test_file_model_hash_is_same_if_model_is_moved(self, sample_model_file):
Expand All @@ -120,7 +62,7 @@ def test_file_model_hash_changes_if_content_changes(
serializer = dfs.DFSSerializer(file_hasher, memory.SHA256)
manifest = serializer.serialize(sample_model_file)

sample_model_file.write_bytes(_ANOTHER_MODEL_TEXT)
sample_model_file.write_bytes(fixtures_constants.ANOTHER_MODEL_TEXT)
new_manifest = serializer.serialize(sample_model_file)

assert manifest.digest.algorithm == new_manifest.digest.algorithm
Expand All @@ -138,7 +80,7 @@ def test_directory_model_with_only_known_file(self, sample_model_file):
)
assert manifest.digest.digest_hex == expected

digest = memory.SHA256(_KNOWN_MODEL_TEXT).compute()
digest = memory.SHA256(fixtures_constants.KNOWN_MODEL_TEXT).compute()
assert manifest.digest.digest_hex != digest.digest_hex

def test_known_folder(self, sample_model_folder):
Expand Down Expand Up @@ -287,7 +229,7 @@ def test_folder_model_change_file(self, sample_model_folder):
# Alter first file in the altered_dir
files = [f for f in altered_dir.iterdir() if f.is_file()]
file_to_change = files[0]
file_to_change.write_bytes(_KNOWN_MODEL_TEXT)
file_to_change.write_bytes(fixtures_constants.KNOWN_MODEL_TEXT)

manifest2 = serializer.serialize(sample_model_folder)
assert manifest1.digest != manifest2.digest
Expand All @@ -313,7 +255,7 @@ def test_special_file(self, sample_model_folder):
os.mkfifo(pipe)
except AttributeError:
# On Windows, `os.mkfifo` does not exist (it should not).
return
return # trivially pass the test

file_hasher = file.FileHasher("unused", memory.SHA256())
serializer = dfs.DFSSerializer(file_hasher, memory.SHA256)
Expand All @@ -323,7 +265,7 @@ def test_special_file(self, sample_model_folder):
):
serializer.serialize(sample_model_folder)

# Also to the same for the pipe itself
# Also do the same for the pipe itself
with pytest.raises(
ValueError, match="Cannot use .* as file or directory"
):
Expand All @@ -350,7 +292,9 @@ def test_known_file(self, sample_model_file):
serializer = dfs.ShardedDFSSerializer(
self._hasher_factory, memory.SHA256()
)

manifest = serializer.serialize(sample_model_file)

expected = (
"2ca48c47d5311a9b2f9305519cd5f927dcef09404fc32ef7886abe8f11450eff"
)
Expand All @@ -360,8 +304,10 @@ def test_file_hash_is_not_same_as_hash_of_content(self, sample_model_file):
serializer = dfs.ShardedDFSSerializer(
self._hasher_factory, memory.SHA256()
)

manifest = serializer.serialize(sample_model_file)
digest = memory.SHA256(_KNOWN_MODEL_TEXT).compute()

digest = memory.SHA256(fixtures_constants.KNOWN_MODEL_TEXT).compute()
assert manifest.digest.digest_hex != digest.digest_hex

def test_file_model_hash_is_same_if_model_is_moved(self, sample_model_file):
Expand All @@ -384,7 +330,7 @@ def test_file_model_hash_changes_if_content_changes(
)
manifest = serializer.serialize(sample_model_file)

sample_model_file.write_bytes(_ANOTHER_MODEL_TEXT)
sample_model_file.write_bytes(fixtures_constants.ANOTHER_MODEL_TEXT)
new_manifest = serializer.serialize(sample_model_file)

assert manifest.digest.algorithm == new_manifest.digest.algorithm
Expand All @@ -402,15 +348,16 @@ def test_directory_model_with_only_known_file(self, sample_model_file):
"c030412c4c9e7f46396b591b1b6c4a4e40c15d9b9ca0b3512af8b20f3219c07f"
)
assert manifest.digest.digest_hex == expected

digest = memory.SHA256(_KNOWN_MODEL_TEXT).compute()
digest = memory.SHA256(fixtures_constants.KNOWN_MODEL_TEXT).compute()
assert manifest.digest.digest_hex != digest.digest_hex

def test_known_folder(self, sample_model_folder):
serializer = dfs.ShardedDFSSerializer(
self._hasher_factory, memory.SHA256()
)

manifest = serializer.serialize(sample_model_folder)

expected = (
"d22e0441cfa5ac2bc09715ddd88c802a7f97e29c93dc50f5498bab2954958ebb"
)
Expand All @@ -434,7 +381,9 @@ def test_empty_file(self, empty_model_file):
serializer = dfs.ShardedDFSSerializer(
self._hasher_factory, memory.SHA256()
)

manifest = serializer.serialize(empty_model_file)

expected = (
"5f2d126b0d3540c17481fdf724e31cf03b4436a2ebabaa1d2e94fe09831be64d"
)
Expand All @@ -444,9 +393,10 @@ def test_directory_model_with_only_empty_file(self, empty_model_file):
serializer = dfs.ShardedDFSSerializer(
self._hasher_factory, memory.SHA256()
)
manifest = serializer.serialize(empty_model_file)
model = empty_model_file.parent

manifest = serializer.serialize(model)

expected = (
"74e81d0062f0a0674014c2f0e4b79985d5015f98a64089e7106a44d32e9ff11f"
)
Expand All @@ -456,7 +406,9 @@ def test_empty_folder(self, empty_model_folder):
serializer = dfs.ShardedDFSSerializer(
self._hasher_factory, memory.SHA256()
)

manifest = serializer.serialize(empty_model_folder)

expected = (
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
)
Expand Down Expand Up @@ -551,7 +503,7 @@ def test_folder_model_change_file(self, sample_model_folder):
# Alter first file in the altered_dir
files = [f for f in altered_dir.iterdir() if f.is_file()]
file_to_change = files[0]
file_to_change.write_bytes(_KNOWN_MODEL_TEXT)
file_to_change.write_bytes(fixtures_constants.KNOWN_MODEL_TEXT)

manifest2 = serializer.serialize(sample_model_folder)
assert manifest1.digest != manifest2.digest
Expand All @@ -560,7 +512,9 @@ def test_deep_folder(self, deep_model_folder):
serializer = dfs.ShardedDFSSerializer(
self._hasher_factory, memory.SHA256()
)

manifest = serializer.serialize(deep_model_folder)

expected = (
"52fa3c459aec58bc5f9702c73cb3c6b8fd19e9342aa3e4db851e1bde69ab1727"
)
Expand Down Expand Up @@ -604,7 +558,7 @@ def test_special_file(self, sample_model_folder):
os.mkfifo(pipe)
except AttributeError:
# On Windows, `os.mkfifo` does not exist (it should not).
return
return # trivially pass the test

serializer = dfs.ShardedDFSSerializer(
self._hasher_factory, memory.SHA256()
Expand All @@ -615,7 +569,7 @@ def test_special_file(self, sample_model_folder):
):
serializer.serialize(sample_model_folder)

# Also to the same for the pipe itself
# Also do the same for the pipe itself
with pytest.raises(
ValueError, match="Cannot use .* as file or directory"
):
Expand Down
77 changes: 77 additions & 0 deletions model_signing/serialization/fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright 2024 The Sigstore Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Test fixtures to share between tests. Not part of the public API."""

import pytest

from model_signing.serialization import fixtures_constants


# Note: Don't make fixtures with global scope as we are altering the models!
@pytest.fixture
def sample_model_file(tmp_path_factory):
file = tmp_path_factory.mktemp("model") / "file"
file.write_bytes(fixtures_constants.KNOWN_MODEL_TEXT)
return file


@pytest.fixture
def empty_model_file(tmp_path_factory):
file = tmp_path_factory.mktemp("model") / "file"
file.write_bytes(b"")
return file


@pytest.fixture
def sample_model_folder(tmp_path_factory):
model_root = tmp_path_factory.mktemp("model") / "root"
model_root.mkdir()

for i in range(2):
root_dir = model_root / f"d{i}"
root_dir.mkdir()
for j in range(3):
dir_file = root_dir / f"f{i}{j}"
dir_file.write_text(f"This is file f{i}{j} in d{i}.")

for i in range(4):
root_file = model_root / f"f{i}"
root_file.write_text(f"This is file f{i} in root.")

return model_root


@pytest.fixture
def empty_model_folder(tmp_path_factory):
model_root = tmp_path_factory.mktemp("model") / "root"
model_root.mkdir()
return model_root


@pytest.fixture
def deep_model_folder(tmp_path_factory):
model_root = tmp_path_factory.mktemp("model") / "root"
model_root.mkdir()

current = model_root
for i in range(5):
current = current / f"d{i}"
current.mkdir()

for i in range(4):
file = current / f"f{i}"
file.write_text(f"This is file f{i}.")

return model_root
18 changes: 18 additions & 0 deletions model_signing/serialization/fixtures_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright 2024 The Sigstore Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Constants used in test fixtures and tests. Not part of the public API."""

KNOWN_MODEL_TEXT: bytes = b"This is a simple model"
ANOTHER_MODEL_TEXT: bytes = b"This is another simple model"

0 comments on commit 96e52b4

Please sign in to comment.