/
_naming.py
94 lines (76 loc) · 3.14 KB
/
_naming.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""Utilities for naming things."""
import hashlib
import json
import math
import uuid
from typing import Any, Mapping, Optional
DEFAULT_HASH_ID_ALPHABET = "123456789abcdefghijkmnopqrstuvwxyz"
def hash_id(
params: Mapping[str, Any],
*,
alphabet: str = DEFAULT_HASH_ID_ALPHABET,
length: Optional[int] = None,
) -> str:
"""
Create a short hash-like ID of any JSON-serializable data structure.
Args:
params: A JSON-serializable object (dictionaries, lists, etc.)
that you want to compute a unique ID for.
alphabet: The character alphabet to make IDs out of.
By default, this defaults to the lowercase ASCII alphbet
characters and the numbers ``1`` to ``9``. The number ``0`` is
removed to avoid confusion with the letter ``o``.
length: The chosen length of the hash ID. This defaults to a
length that should make the ID approximately as random as
a UUID. This default length depends on the number
of unique characters in the alphabet.
Returns:
The hash ID as a string.
"""
# We calculate how long the ID needs to be
# in order for it to be as unique as a UUID.
# However, because we're using hashing rather than a random
# number generator, we have to consider that the hashing
# algorithm could have a higher rate of collisions than a UUID.
if length is None:
length = math.ceil(math.log(2**122, len(alphabet)))
json_string = json.dumps(
params,
ensure_ascii=True,
allow_nan=False,
indent=None,
sort_keys=True,
).encode("utf-8")
digest = hashlib.sha256(json_string).digest()
return decode_bytes_to_alphabet(digest, alphabet=alphabet, length=length)
def decode_bytes_to_alphabet(
digest: bytes, *, alphabet: str = DEFAULT_HASH_ID_ALPHABET, length: int
) -> str:
"""
Decode an arbitrary byte string into our alphabet of choice.
Args:
alphabet: The character alphabet to make IDs out of.
By default, this defaults to the lowercase ASCII alphbet
characters and the numbers ``1`` to ``9``. The number ``0`` is
removed to avoid confusion with the letter ``o``.
length: The chosen length of the hash ID. This defaults to a
length that should make the ID approximately as random as
a UUID. This default length depends on the number
of unique characters in the alphabet.
"""
acc = int.from_bytes(digest, byteorder="big") % len(alphabet) ** length
string = ""
while acc:
acc, idx = divmod(acc, len(alphabet))
string = alphabet[idx : idx + 1] + string
return string
def temporary_filename(base_filename: str) -> str:
"""
Append a random string to an existing filename to create a temporary filename.
This is different from making a temporary filename on /tmp or somewhere else.
Our goal is to create a temporary file that is guaranteed to be on the sam
filesystem as wherever ``base_filename`` is.
"""
return "--".join(
(base_filename, decode_bytes_to_alphabet(uuid.uuid4().bytes, length=10))
)