Skip to content

Commit

Permalink
Add command to compute layer hash
Browse files Browse the repository at this point in the history
  • Loading branch information
Viktor Dick committed Feb 26, 2024
1 parent 2fd37e8 commit dc6253d
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 7 deletions.
21 changes: 21 additions & 0 deletions perfact/zodbsync/commands/layer_hash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env python

from ..subcommand import SubCommand
from ..helpers import hashdir


class LayerHash(SubCommand):
"""Compute hashes for the contents of a layer"""
subcommand = 'layer-hash'
connect = False
use_config = False

@staticmethod
def add_args(parser):
parser.add_argument(
'path', type=str, help="Root folder of layer"
)

def run(self):
for path, checksum in hashdir(self.args.path.rstrip('/')):
print(checksum, path)
33 changes: 33 additions & 0 deletions perfact/zodbsync/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import ast
import operator
import importlib
import hashlib
import os


class Namespace(object):
Expand Down Expand Up @@ -233,3 +235,34 @@ def increment_txnid(s):
arr[pos] += 1
break
return bytes(arr)


def hashdir(root):
"""
Create a sorted list of hashes for each folder below path.
This is used when changing the contents of a layer to recognize which
objects are to be played back.
For each folder that contains files, it creates a sha512sum over:
- The sorted list of files
- The concatenation of the file contents
This is a coroutine that yields tuples of relative paths and the checksum.
"""
def process(path):
entries = list(os.scandir(path))
files = sorted(entry.path for entry in entries if entry.is_file())
dirs = sorted(entry.path for entry in entries if entry.is_dir())
if files:
h = hashlib.sha512()
for file in files:
h.update(file.encode('utf-8') + b'\n')
h.update(b'\n')
for fname in files:
with open(fname, 'rb') as f:
while data := f.read(1024*1024):
h.update(data)
yield (path[len(root):], h.hexdigest())

for d in dirs:
yield from process(d)

yield from process(root)
16 changes: 9 additions & 7 deletions perfact/zodbsync/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,15 @@
from .commands.reformat import Reformat
from .commands.checkout import Checkout
from .commands.freeze import Freeze
from .commands.layer_hash import LayerHash


class Runner(object):
"""
Parses arguments to select the correct SubCommand subclass.
"""
commands = [Record, Playback, Watch, Pick, Upload, WithLock, Reset, Exec,
Reformat, Checkout, Freeze]
Reformat, Checkout, Freeze, LayerHash]

def __init__(self):
"""
Expand Down Expand Up @@ -100,18 +101,19 @@ def parse(self, *argv):
logger.propagate = False

self.logger = logger
config = load_config(args.config)
if self.config is not None and config != self.config:
self.logger.warning("Reusing runner with different config")
self.sync = None
self.config = config
if getattr(args.command, 'use_config', True):
config = load_config(args.config)
if self.config is not None and config != self.config:
self.logger.warning("Reusing runner with different config")
self.sync = None
self.config = config

# Usually, each command needs a connection to the ZODB, but it might
# explicitly disable it.
if self.sync is None and getattr(args.command, 'connect', True):
self.sync = ZODBSync(config=self.config, logger=logger)

if not args.no_lock:
if self.config and not args.no_lock:
self.lock = filelock.FileLock(
os.path.join(self.config['base_dir'], '.zodbsync.lock')
)
Expand Down

0 comments on commit dc6253d

Please sign in to comment.