diff --git a/tuf/repository_lib.py b/tuf/repository_lib.py index 5eb88a01f0..738ab62340 100644 --- a/tuf/repository_lib.py +++ b/tuf/repository_lib.py @@ -91,6 +91,10 @@ # Supported key types. SUPPORTED_KEY_TYPES = ['rsa', 'ed25519', 'ecdsa-sha2-nistp256'] +# The algorithm used by the repository to generate the path hash prefixes +# of hashed bin delegations. Please see delegate_hashed_bins() +HASH_FUNCTION = tuf.settings.DEFAULT_HASH_ALGORITHM + def _generate_and_write_metadata(rolename, metadata_filename, targets_directory, metadata_directory, storage_backend, @@ -1023,14 +1027,120 @@ def get_metadata_versioninfo(rolename, repository_name): -# TODO: Is this function needed? It does not seem used, also the same -# function exists as private method in updater.Updater._get_target_hash. +def create_bin_name(low, high, prefix_len): + """ + + Create a string name of a delegated hash bin, where name will be a range of + zero-padded (up to prefix_len) strings i.e. for low=00, high=07, + prefix_len=3 the returned name would be '000-007'. + + + low: + The low end of the prefix range to be binned + + high: + The high end of the prefix range to be binned + + prefix_len: + The length of the prefix range components + + + A string bin name, with each end of the range zero-padded up to prefix_len + """ + if low == high: + return "{low:0{len}x}".format(low=low, len=prefix_len) + + return "{low:0{len}x}-{high:0{len}x}".format(low=low, high=high, + len=prefix_len) + + + + + +def get_bin_numbers(number_of_bins): + """ + + Given the desired number of bins (number_of_bins) calculate the prefix + length (prefix_length), total number of prefixes (prefix_count) and the + number of prefixes to be stored in each bin (bin_size). + Example: number_of_bins = 32 + prefix_length = 2 + prefix_count = 256 + bin_size = 8 + That is, each of the 32 hashed bins are responsible for 8 hash prefixes, + i.e. 00-07, 08-0f, ..., f8-ff. + + + number_of_bins: + The number of hashed bins in use + + + A tuple of three values: + 1. prefix_length: the length of each prefix + 2. prefix_count: the total number of prefixes in use + 3. bin_size: the number of hash prefixes to be stored in each bin + """ + # Convert 'number_of_bins' to hexadecimal and determine the number of + # hexadecimal digits needed by each hash prefix + prefix_length = len("{:x}".format(number_of_bins - 1)) + # Calculate the total number of hash prefixes (e.g., 000 - FFF total values) + prefix_count = 16 ** prefix_length + # Determine how many prefixes to assign to each bin + bin_size = prefix_count // number_of_bins + + # For simplicity, ensure that 'prefix_count' (16 ^ n) can be evenly + # distributed over 'number_of_bins' (must be 2 ^ n). Each bin will contain + # (prefix_count / number_of_bins) hash prefixes. + if prefix_count % number_of_bins != 0: + # Note: x % y != 0 does not guarantee that y is not a power of 2 for + # arbitrary x and y values. However, due to the relationship between + # number_of_bins and prefix_count, it is true for them. + raise securesystemslib.exceptions.Error('The "number_of_bins" argument' + ' must be a power of 2.') + + return prefix_length, prefix_count, bin_size + + + + + +def find_bin_for_target_hash(target_hash, number_of_bins): + """ + + For a given hashed filename, target_hash, calculate the name of a hashed bin + into which this file would be delegated given number_of_bins bins are in + use. + + + target_hash: + The hash of the target file's path + + number_of_bins: + The number of hashed_bins in use + + + The name of the hashed bin target_hash would be binned into + """ + + prefix_length, _, bin_size = get_bin_numbers(number_of_bins) + + prefix = int(target_hash[:prefix_length], 16) + + low = prefix - (prefix % bin_size) + high = (low + bin_size - 1) + + return create_bin_name(low, high, prefix_length) + + + + + def get_target_hash(target_filepath): """ Compute the hash of 'target_filepath'. This is useful in conjunction with the "path_hash_prefixes" attribute in a delegated targets role, which - tells us which paths it is implicitly responsible for. + tells us which paths a role is implicitly responsible for. The repository may optionally organize targets into hashed bins to ease target delegations and role metadata management. The use of consistent @@ -1053,17 +1163,9 @@ def get_target_hash(target_filepath): """ tuf.formats.RELPATH_SCHEMA.check_match(target_filepath) - # Calculate the hash of the filepath to determine which bin to find the - # target. The client currently assumes the repository uses - # 'tuf.settings.DEFAULT_HASH_ALGORITHM' to generate hashes and 'utf-8'. - digest_object = securesystemslib.hash.digest( - tuf.settings.DEFAULT_HASH_ALGORITHM) - encoded_target_filepath = target_filepath.encode('utf-8') - digest_object.update(encoded_target_filepath) - target_filepath_hash = digest_object.hexdigest() - - return target_filepath_hash - + digest_object = securesystemslib.hash.digest(algorithm=HASH_FUNCTION) + digest_object.update(target_filepath.encode('utf-8')) + return digest_object.hexdigest() diff --git a/tuf/repository_tool.py b/tuf/repository_tool.py index be01ad3888..55e960153d 100755 --- a/tuf/repository_tool.py +++ b/tuf/repository_tool.py @@ -1,3 +1,4 @@ + #!/usr/bin/env python # Copyright 2013 - 2017, New York University and the TUF contributors @@ -103,10 +104,6 @@ # through 2031 and beyond. DEFAULT_RSA_KEY_BITS=3072 -# The algorithm used by the repository to generate the path hash prefixes -# of hashed bin delegations. Please see delegate_hashed_bins() -HASH_FUNCTION = tuf.settings.DEFAULT_HASH_ALGORITHM - # The default number of hashed bin delegations DEFAULT_NUM_BINS=1024 @@ -2535,7 +2532,7 @@ def delegate_hashed_bins(self, list_of_targets, keys_of_hashed_bins, securesystemslib.formats.ANYKEYLIST_SCHEMA.check_match(keys_of_hashed_bins) tuf.formats.NUMBINS_SCHEMA.check_match(number_of_bins) - prefix_length, prefix_count, bin_size = _get_bin_numbers(number_of_bins) + prefix_length, prefix_count, bin_size = repo_lib.get_bin_numbers(number_of_bins) logger.info('Creating hashed bin delegations.\n' + repr(len(list_of_targets)) + ' total targets.\n' + @@ -2549,7 +2546,7 @@ def delegate_hashed_bins(self, list_of_targets, keys_of_hashed_bins, ordered_roles = [] for idx in range(0, prefix_count, bin_size): high = idx + bin_size - 1 - name = _create_bin_name(idx, high, prefix_length) + name = repo_lib.create_bin_name(idx, high, prefix_length) if bin_size == 1: target_hash_prefixes = [name] else: @@ -2573,7 +2570,7 @@ def delegate_hashed_bins(self, list_of_targets, keys_of_hashed_bins, # Determine the hash prefix of 'target_path' by computing the digest of # its path relative to the targets directory. # We must hash a target path as it appears in the metadata - hash_prefix = _get_hash(target_path)[:prefix_length] + hash_prefix = repo_lib.get_target_hash(target_path)[:prefix_length] ordered_roles[int(hash_prefix, 16) // bin_size]["target_paths"].append(target_path) keyids, keydict = _keys_to_keydict(keys_of_hashed_bins) @@ -2680,8 +2677,8 @@ def add_target_to_bin(self, target_filepath, number_of_bins=DEFAULT_NUM_BINS, # TODO: check target_filepath is sane - path_hash = _get_hash(target_filepath) - bin_name = _find_bin_for_hash(path_hash, number_of_bins) + path_hash = repo_lib.get_target_hash(target_filepath) + bin_name = repo_lib.find_bin_for_target_hash(path_hash, number_of_bins) # Ensure the Targets object has delegated to hashed bins if not self._delegated_roles.get(bin_name, None): @@ -2742,8 +2739,8 @@ def remove_target_from_bin(self, target_filepath, # TODO: check target_filepath is sane? - path_hash = _get_hash(target_filepath) - bin_name = _find_bin_for_hash(path_hash, number_of_bins) + path_hash = repo_lib.get_target_hash(target_filepath) + bin_name = repo_lib.find_bin_for_target_hash(path_hash, number_of_bins) # Ensure the Targets object has delegated to hashed bins if not self._delegated_roles.get(bin_name, None): @@ -2845,112 +2842,6 @@ def _keys_to_keydict(keys): - -def _get_hash(target_filepath): - """ - - Generate a hash of target_filepath, a path to a file (not the file - itself), using HASH_FUNCTION - - - target_filepath: - A path to a targetfile, relative to the targets directory - - - The hexdigest hash of the filepath. - """ - - # TODO: ensure target_filepath is relative to targets_directory? - digest_object = securesystemslib.hash.digest(algorithm=HASH_FUNCTION) - digest_object.update(target_filepath.encode('utf-8')) - return digest_object.hexdigest() - - - - -def _create_bin_name(low, high, prefix_len): - """ - - Create a string name of a delegated hash bin, where name will be a range of - zero-padded (up to prefix_len) strings i.e. for low=00, high=07, - prefix_len=3 the returned name would be '000-007'. - """ - if low == high: - return "{low:0{len}x}".format(low=low, len=prefix_len) - - return "{low:0{len}x}-{high:0{len}x}".format(low=low, high=high, - len=prefix_len) - - - - - -def _get_bin_numbers(number_of_bins): - """ - Given the desired number of bins (number_of_bins) calculate the prefix length - (prefix_length), total number of prefixes (prefix_count) and the number of - prefixes to be stored in each bin (bin_size). - Example: number_of_bins = 32 - prefix_length = 2 - prefix_count = 256 - bin_size = 8 - That is, each of the 32 hashed bins are responsible for 8 hash prefixes, i.e. - 00-07, 08-0f, ..., f8-ff. - """ - # Convert 'number_of_bins' to hexadecimal and determine the number of - # hexadecimal digits needed by each hash prefix - prefix_length = len("{:x}".format(number_of_bins - 1)) - # Calculate the total number of hash prefixes (e.g., 000 - FFF total values) - prefix_count = 16 ** prefix_length - # Determine how many prefixes to assign to each bin - bin_size = prefix_count // number_of_bins - - # For simplicity, ensure that 'prefix_count' (16 ^ n) can be evenly - # distributed over 'number_of_bins' (must be 2 ^ n). Each bin will contain - # (prefix_count / number_of_bins) hash prefixes. - if prefix_count % number_of_bins != 0: - # Note: x % y != 0 does not guarantee that y is not a power of 2 for - # arbitrary x and y values. However, due to the relationship between - # number_of_bins and prefix_count, it is true for them. - raise securesystemslib.exceptions.Error('The "number_of_bins" argument' - ' must be a power of 2.') - - return prefix_length, prefix_count, bin_size - - - - -def _find_bin_for_hash(path_hash, number_of_bins): - """ - - For a given hashed filename, path_hash, calculate the name of a hashed bin - into which this file would be delegated given number_of_bins bins are in - use. - - - path_hash: - The hash of the target file's path - - number_of_bins: - The number of hashed_bins in use - - - The name of the hashed bin path_hash would be binned into. - """ - - prefix_length, _, bin_size = _get_bin_numbers(number_of_bins) - - prefix = int(path_hash[:prefix_length], 16) - - low = prefix - (prefix % bin_size) - high = (low + bin_size - 1) - - return _create_bin_name(low, high, prefix_length) - - - - - def create_new_repository(repository_directory, repository_name='default', storage_backend=None): """