Skip to content

Commit

Permalink
fix(io.default): Handle PermissionError during check (#177)
Browse files Browse the repository at this point in the history
A PermissionError while checking a file will now result in it
being considered corrupt.

Bonus: fixed some ham-fisted mocking in tests/io/test_lustrehsmnode.py
  which was not being properly cleaned up after the test finished.

Closes #176
  • Loading branch information
ketiltrout committed May 9, 2024
1 parent 2179699 commit 47557d2
Show file tree
Hide file tree
Showing 26 changed files with 133 additions and 32 deletions.
1 change: 1 addition & 0 deletions alpenhorn/auto_import.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Routines for the importing of new files on a node."""

from __future__ import annotations
from typing import TYPE_CHECKING

Expand Down
8 changes: 5 additions & 3 deletions alpenhorn/client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,11 @@ def sync(
click.echo(
"Adding {} new requests{}.\n".format(
len(files_out) or "no",
", keeping {} already existing".format(len(files_in))
if len(files_in)
else "",
(
", keeping {} already existing".format(len(files_in))
if len(files_in)
else ""
),
)
)

Expand Down
1 change: 1 addition & 0 deletions alpenhorn/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
needing database access must separately call `connect()` to initialise the
database proxy.
"""

from __future__ import annotations
from typing import Any

Expand Down
1 change: 1 addition & 0 deletions alpenhorn/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
If other keys are present in the dictionary returned by `register_extension`, they
are ignored.
"""

from __future__ import annotations
from typing import TYPE_CHECKING, Tuple

Expand Down
1 change: 1 addition & 0 deletions alpenhorn/io/_default_asyncs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""DefaultIO asyncs (functions that run asynchronously in tasks)."""

from __future__ import annotations
from typing import TYPE_CHECKING

Expand Down
1 change: 1 addition & 0 deletions alpenhorn/io/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
something even remotely resembling a POSIX filesystem may be better served
by subclassing from DefaultIO instead of from here directly.
"""

from __future__ import annotations
from typing import TYPE_CHECKING, IO

Expand Down
16 changes: 12 additions & 4 deletions alpenhorn/io/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
explicitly specify `io_class` (as well as being used explicitly when `io_class`
has the value "Default").
"""

from __future__ import annotations
from typing import TYPE_CHECKING, IO

Expand Down Expand Up @@ -268,7 +269,7 @@ def locked(self, path: os.PathLike) -> bool:

return path.with_name("." + path.name + ".lock").exists()

def md5(self, path: str | pathlib.Path, *segments) -> str:
def md5(self, path: str | pathlib.Path, *segments) -> str | None:
"""Compute the MD5 hash of the file at the specified path.
This can take a long time: call it from an async.
Expand All @@ -283,10 +284,17 @@ def md5(self, path: str | pathlib.Path, *segments) -> str:
Returns
-------
md5sum : str
the base64-encoded MD5 hash value
md5sum : str | None
the base64-encoded MD5 hash value or None on error
"""
return util.md5sum_file(pathlib.Path(self.node.root, path, *segments))
path = pathlib.Path(self.node.root, path, *segments)
try:
return util.md5sum_file(path)
except FileNotFoundError:
log.warning(f"MD5 sum check for {path} failed: file not found.")
except PermissionError:
log.warning(f"MD5 sum check for {path} failed: permission error.")
return None

def open(self, path: os.PathLike | str, binary: bool = True) -> IO:
"""Open the file specified by `path` for reading.
Expand Down
1 change: 1 addition & 0 deletions alpenhorn/io/ioutil.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""I/O utility functions."""

from __future__ import annotations
from typing import TYPE_CHECKING

Expand Down
1 change: 1 addition & 0 deletions alpenhorn/io/lfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
* `lfs hsm_release`
requests the state change `RESTORED -> RELEASED`
"""

from __future__ import annotations
from typing import TYPE_CHECKING

Expand Down
1 change: 1 addition & 0 deletions alpenhorn/io/lustrehsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
with a secondary StorageNode of a different class meant to store files
too small for the external system.
"""

from __future__ import annotations
from typing import TYPE_CHECKING, IO

Expand Down
1 change: 1 addition & 0 deletions alpenhorn/io/lustrequota.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
The quota target can either be the one reported by "lfs quota" directly
or else set to a fixed value via the `io_config`.
"""

from __future__ import annotations
from typing import TYPE_CHECKING

Expand Down
1 change: 1 addition & 0 deletions alpenhorn/io/polling.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
Use in situations where inotify won't work (like NFS mounts).
"""

from watchdog.observers.polling import PollingObserver

from .default import DefaultNodeIO
Expand Down
1 change: 1 addition & 0 deletions alpenhorn/io/transport.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Transport Group I/O."""

from __future__ import annotations
from typing import TYPE_CHECKING

Expand Down
1 change: 1 addition & 0 deletions alpenhorn/storage.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""StorageNode and StorageGroup table models."""

# Type annotation shennanigans
from __future__ import annotations
from typing import TYPE_CHECKING
Expand Down
1 change: 1 addition & 0 deletions alpenhorn/update.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Routines for updating the state of a node.
"""

from __future__ import annotations
from typing import TYPE_CHECKING

Expand Down
1 change: 1 addition & 0 deletions alpenhorn/util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Utility functions."""

from __future__ import annotations

import socket
Expand Down
1 change: 1 addition & 0 deletions examples/pattern_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
in an extended ArchiveAcq table and the matched FileType in an
extended ArchiveFile table.
"""

from __future__ import annotations
from typing import TYPE_CHECKING, Tuple

Expand Down
43 changes: 41 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Common fixtures"""

import os
import pytest
import shutil
Expand Down Expand Up @@ -314,7 +315,7 @@ def _mocked_stat(path):

from math import ceil

file = fs.get_object(path)
file = fs.get_object(path, check_read_perm=False)
size = file.size

# Anything with a __dict__ works here.
Expand All @@ -330,6 +331,44 @@ class Result:
yield


@pytest.fixture
def mock_exists(fs):
"""Mocks pathlib.PosixPath.exists to work with pyfakefs."""

def _mocked_exists(path):
"""Mock of pathlib.PosixPath.exists to work better with pyfakefs.
The problem here is if there's an unreadable file in a readable
directory, pyfakefs raises PermissionError in pathlib.Path.exists,
even though it should return True (since only the directory needs to
be read for an existence check.)
"""
nonlocal fs

from math import ceil

try:
dir_ = fs.get_object(path.parent)
# Parent directory not readable
if not dir_.st_mode & 0o222:
raise PermissionError("Permission denied")
except FileNotFoundError:
return False

# Directory is readable
try:
file = fs.get_object(path)
except PermissionError:
pass
except FileNotFoundError:
return False

return True

with patch("pathlib.PosixPath.exists", _mocked_exists):
yield


@pytest.fixture
def mock_observer():
"""Mocks the DefaultIO observer so its always the PollingObserver"""
Expand All @@ -340,7 +379,7 @@ def mock_observer():


@pytest.fixture
def xfs(fs, mock_observer, mock_statvfs, mock_stat):
def xfs(fs, mock_observer, mock_statvfs, mock_stat, mock_exists):
"""An extended pyfakefs.
Patches more stuff for proper behaviour with alpenhorn unittests"""
Expand Down
13 changes: 13 additions & 0 deletions tests/io/test_defaultnode.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Test DefaultNodeIO."""

import pytest
import pathlib

Expand Down Expand Up @@ -111,6 +112,18 @@ def test_md5(unode, xfs):
assert unode.io.md5("dir/file2") == "9e107d9d372bb6826bd81d3542a419d6"


def test_md5_bad(unode, xfs):
"""test DefaultNodeIO.md5() with bad files"""

# Try a missing file
xfs.create_dir("/node/dir/")
assert unode.io.md5("dir", "file1") is None

# Try an unreadable file
xfs.create_file("/node/dir/file", st_mode=0)
assert unode.io.md5("dir/file") == None


def test_open_absolute(unode, xfs):
"""Test DefaultNodeIO.open() on an absolute path."""

Expand Down
22 changes: 22 additions & 0 deletions tests/io/test_defaultnode_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,28 @@ def test_check_md5sum_bad(xfs, queue, simpleacq, archivefile, unode, archivefile
assert ArchiveFileCopy.get(file=file, node=unode.db).has_file == "X"


def test_check_md5sum_perm(xfs, queue, simpleacq, archivefile, unode, archivefilecopy):
"""Test check async with permission error."""

file = archivefile(name="file", acq=simpleacq, size_b=43, md5sum="incorrect-md5")
copy = archivefilecopy(file=file, node=unode.db, has_file="M")

xfs.create_file(
copy.path, st_mode=0, contents="The quick brown fox jumps over the lazy dog"
)

# queue
unode.io.check(copy)

# Call the async
task, key = queue.get()
task()
queue.task_done(key)

# Copy is now corrupt.
assert ArchiveFileCopy.get(file=file, node=unode.db).has_file == "X"


def test_check_missing(xfs, queue, simpleacq, archivefile, unode, archivefilecopy):
"""Test check async with missing file."""

Expand Down
40 changes: 20 additions & 20 deletions tests/io/test_lustrehsmnode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@

import pytest
import datetime
from unittest.mock import MagicMock
from unittest.mock import patch, MagicMock

from alpenhorn.archive import ArchiveFileCopy
from alpenhorn.io import lustrehsm
from alpenhorn.update import UpdateableNode


Expand Down Expand Up @@ -249,16 +248,15 @@ def test_auto_verify_missing(queue, node):
def test_auto_verify_restored(xfs, node):
"""Test auto_verification on a restored file."""

# Mock the Default IO's check function which we don't need to test here
from alpenhorn.io.default import DefaultNodeIO

DefaultNodeIO.check = MagicMock()
# When a file is restored, LustreHSM.auto_verify just tail-calls DefaultIO.check
check_mock = MagicMock()

xfs.create_file("/node/simpleacq/file1")
copy = ArchiveFileCopy.get(id=1)

node.io.auto_verify(copy)
DefaultNodeIO.check.assert_called_once_with(copy)
with patch("alpenhorn.io.default.DefaultNodeIO.check", check_mock):
node.io.auto_verify(copy)
check_mock.assert_called_once_with(copy)


@pytest.mark.lfs_hsm_state(
Expand All @@ -269,8 +267,8 @@ def test_auto_verify_restored(xfs, node):
def test_auto_verify_released(xfs, queue, mock_lfs, node):
"""Test auto_verification on a released file."""

# Mock the Default IO's check_async after it gets imported into lustrehsm.py
lustrehsm.check_async = MagicMock()
# Mock the check async, which is called by the task to do the heavy lifting
async_mock = MagicMock()

xfs.create_file("/node/simpleacq/file1")

Expand All @@ -283,11 +281,12 @@ def test_auto_verify_released(xfs, queue, mock_lfs, node):
assert queue.qsize == 1

# Run task
task, key = queue.get()
task()
queue.task_done(key)
with patch("alpenhorn.io.lustrehsm.check_async", async_mock):
task, key = queue.get()
task()
queue.task_done(key)

lustrehsm.check_async.assert_called_once()
async_mock.assert_called_once()

# File has been re-released
lfs = mock_lfs("")
Expand All @@ -302,8 +301,8 @@ def test_auto_verify_released(xfs, queue, mock_lfs, node):
def test_auto_verify_ready_released(xfs, queue, mock_lfs, node):
"""Test auto_verification on a released file that's ready."""

# Mock the Default IO's check_async after it gets imported into lustrehsm.py
lustrehsm.check_async = MagicMock()
# Mock the check async, which is called by the task to do the heavy lifting
async_mock = MagicMock()

xfs.create_file("/node/simpleacq/file1")

Expand All @@ -314,11 +313,12 @@ def test_auto_verify_ready_released(xfs, queue, mock_lfs, node):
assert queue.qsize == 1

# Run task
task, key = queue.get()
task()
queue.task_done(key)
with patch("alpenhorn.io.lustrehsm.check_async", async_mock):
task, key = queue.get()
task()
queue.task_done(key)

lustrehsm.check_async.assert_called_once()
async_mock.assert_called_once()

# File has _not_ been re-released
lfs = mock_lfs("")
Expand Down
4 changes: 1 addition & 3 deletions tests/io/test_transport.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,7 @@ def transport_fleet_no_init(xfs, hostname, queue, storagegroup, storagenode):
node.io.pull = MagicMock(return_value=None)
# mock bytes_avail to simply return avail_gb to avoid having to mess about with pyfakefs
node.io.bytes_avail = MagicMock(
return_value=None
if node.db.avail_gb is None
else node.db.avail_gb * 2**30
return_value=None if node.db.avail_gb is None else node.db.avail_gb * 2**30
)
xfs.create_dir(node.db.root)

Expand Down
1 change: 1 addition & 0 deletions tests/test_extensions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""test alpenhorn.extensions."""

import pytest

from unittest.mock import patch, MagicMock
Expand Down
1 change: 1 addition & 0 deletions tests/test_logger.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Test alpenhorn.logger"""

import pytest
import socket
import logging
Expand Down
Loading

0 comments on commit 47557d2

Please sign in to comment.