Skip to content

Commit

Permalink
fix incorrect hash type result in s3fs
Browse files Browse the repository at this point in the history
The s3fs backend is currently computing the incorrect hash type. The
default hashing algorithm was changed from md5 to sha256. The s3 backend
calls the hashing function without specifying the hash algorithm,
and then hardcodes the result's hash type to md5. After the change to
sha256, this means it was computing a sha256 sum, but saying it was md5.
In turn, it would compute the sha256 sum of the existing target file
and compare it to the cached file's md5, which would obviously always
be different.

This causes downstream effects, such as test mode always reporting a
change, even when there isn't one.

Fixes #65589
  • Loading branch information
dead10ck committed Dec 6, 2023
1 parent d0f91d9 commit d436773
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 9 deletions.
1 change: 1 addition & 0 deletions changelog/65589.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixes the s3fs backend computing the local cache's files with the wrong hash type
11 changes: 7 additions & 4 deletions salt/fileserver/s3fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@

S3_CACHE_EXPIRE = 30 # cache for 30 seconds
S3_SYNC_ON_UPDATE = True # sync cache on update rather than jit
S3_HASH_TYPE = "md5"


def envs():
Expand Down Expand Up @@ -181,7 +182,7 @@ def find_file(path, saltenv="base", **kwargs):

def file_hash(load, fnd):
"""
Return an MD5 file hash
Return the hash of an object's cached copy
"""
if "env" in load:
# "env" is not supported; Use "saltenv".
Expand All @@ -200,8 +201,8 @@ def file_hash(load, fnd):
)

if os.path.isfile(cached_file_path):
ret["hsum"] = salt.utils.hashutils.get_hash(cached_file_path)
ret["hash_type"] = "md5"
ret["hash_type"] = S3_HASH_TYPE
ret["hsum"] = salt.utils.hashutils.get_hash(cached_file_path, S3_HASH_TYPE)

return ret

Expand Down Expand Up @@ -708,7 +709,9 @@ def _get_file_from_s3(metadata, saltenv, bucket_name, path, cached_file_path):

if file_etag.find("-") == -1:
file_md5 = file_etag
cached_md5 = salt.utils.hashutils.get_hash(cached_file_path, "md5")
cached_md5 = salt.utils.hashutils.get_hash(
cached_file_path, S3_HASH_TYPE
)

# hashes match we have a cache hit
if cached_md5 == file_md5:
Expand Down
145 changes: 140 additions & 5 deletions tests/pytests/unit/fileserver/test_s3fs.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,154 @@
import os

import boto3
import pytest
import yaml

# moto must be imported before boto3
from moto import mock_s3

import salt.fileserver.s3fs as s3fs
import salt.utils.s3
from tests.support.mock import patch


@pytest.fixture(scope="module")
def bucket():
return "mybucket"


@pytest.fixture(scope="module")
def aws_creds():
return {
"aws_access_key_id": "testing",
"aws_secret_access_key": "testing",
"aws_session_token": "testing",
"region_name": "us-east-1",
}


@pytest.fixture
def configure_loader_modules(tmp_path):
@pytest.fixture(scope="function")
def configure_loader_modules(tmp_path, bucket):
opts = {
"cachedir": tmp_path,
"s3.buckets": {"base": [bucket]},
"s3.location": "us-east-1",
}
return {s3fs: {"__opts__": opts}}
utils = {"s3.query": salt.utils.s3.query}

return {s3fs: {"__opts__": opts, "__utils__": utils}}


# these can't be fixtures, as moto docs suggest, since they would introduce a
# separate scope of mock than the s3fs module's scope
@pytest.fixture(scope="function")
def s3(bucket, aws_creds):
def _s3():
conn = boto3.client("s3", **aws_creds)
conn.create_bucket(Bucket=bucket)
return conn

return _s3


def make_keys(bucket, conn, keys):
for key, data in keys.items():
conn.put_object(
Bucket=bucket,
Key=key,
Body=data["content"],
)


def verify_cache(bucket, expected):
for key, data in expected.items():
correct_content = data["content"]
cache_file = s3fs._get_cached_file_name(bucket, "base", key)
assert os.path.exists(cache_file)

if correct_content is None:
continue

with salt.utils.files.fopen(cache_file) as f:
content = f.read()
assert correct_content == content


# [HACK] to always invalidate the cache and re-download on every update
@pytest.fixture(scope="module", autouse=True)
def _patch_cache_expire():
with patch("salt.fileserver.s3fs.S3_CACHE_EXPIRE", -1):
yield


@mock_s3
def test_update(bucket, s3):
"""Tests that files get downloaded from s3 to the local cache."""

keys = {
"top.sls": {"content": yaml.dump({"base": {"*": ["foo"]}})},
"foo.sls": {"content": yaml.dump({"nginx": {"pkg.installed": []}})},
"files/nginx.conf": {"content": "server {}"},
}

conn = s3()
make_keys(bucket, conn, keys)
s3fs.update()
verify_cache(bucket, keys)

# make a modification and update again - verify the change is retrieved
keys["top.sls"]["content"] = yaml.dump({"base": {"*": ["foo", "bar"]}})
make_keys(bucket, conn, keys)
s3fs.update()
verify_cache(bucket, keys)

# TODO: fix

# delete_file = "files/nginx.conf"
# del keys[delete_file]
# conn.delete_object(Bucket=bucket, Key=delete_file)

# s3fs.update()
# verify_cache(bucket, keys)

# cache_file = s3fs._get_cached_file_name(bucket, "base", delete_file)
# assert not os.path.exists(cache_file)


@mock_s3
def test_s3_hash(bucket, s3):
"""Verifies that s3fs hashes files correctly."""

keys = {
"top.sls": {"content": yaml.dump({"base": {"*": ["foo"]}})},
"foo.sls": {"content": yaml.dump({"nginx": {"pkg.installed": []}})},
"files/nginx.conf": {"content": "server {}"},
}

conn = s3()
make_keys(bucket, conn, keys)
s3fs.update()

for key, item in keys.items():
cached_file_path = s3fs._get_cached_file_name(bucket, "base", key)
item["hash"] = salt.utils.hashutils.get_hash(
cached_file_path, s3fs.S3_HASH_TYPE
)
item["cached_file_path"] = cached_file_path

load = {"saltenv": "base"}
fnd = {"bucket": bucket}

for key, item in keys.items():
fnd["path"] = item["cached_file_path"]
actual_hash = s3fs.file_hash(load, fnd)
assert s3fs.S3_HASH_TYPE == actual_hash["hash_type"]
assert item["hash"] == actual_hash["hsum"]


def test_cache_round_trip():
def test_cache_round_trip(bucket):
metadata = {"foo": "bar"}
cache_file = s3fs._get_cached_file_name("base", "fake_bucket", "some_file")
cache_file = s3fs._get_cached_file_name(bucket, "base", "somefile")

s3fs._write_buckets_cache_file(metadata, cache_file)
assert s3fs._read_buckets_cache_file(cache_file) == metadata
Expand Down

0 comments on commit d436773

Please sign in to comment.