Skip to content

Commit

Permalink
Merge pull request quay#139 from kleesc/replace-resumablehashlib
Browse files Browse the repository at this point in the history
Replace resumablehashlib
  • Loading branch information
kleesc committed Jan 8, 2020
2 parents 8a115c1 + f9842be commit 9cb2de2
Show file tree
Hide file tree
Showing 10 changed files with 36 additions and 33 deletions.
4 changes: 2 additions & 2 deletions data/database.py
Expand Up @@ -22,7 +22,7 @@

from sqlalchemy.engine.url import make_url

import resumablehashlib
import rehash
from cachetools.func import lru_cache

from active_migration import ERTMigrationFlags, ActiveDataMigration
Expand Down Expand Up @@ -1356,7 +1356,7 @@ class BlobUpload(BaseModel):
repository = ForeignKeyField(Repository)
uuid = CharField(index=True, unique=True)
byte_count = BigIntegerField(default=0)
sha_state = ResumableSHA256Field(null=True, default=resumablehashlib.sha256)
sha_state = ResumableSHA256Field(null=True, default=rehash.sha256)
location = ForeignKeyField(ImageStorageLocation)
storage_metadata = JSONField(null=True, default={})
chunk_count = IntegerField(default=0)
Expand Down
37 changes: 20 additions & 17 deletions data/fields.py
@@ -1,11 +1,12 @@
import base64
import pickle
import string
import json

from random import SystemRandom

import bcrypt
import resumablehashlib
import rehash

from peewee import TextField, CharField, SmallIntegerField
from data.text import prefix_search
Expand All @@ -17,42 +18,44 @@ def random_string(length=16):


class _ResumableSHAField(TextField):
"""
Base Class used to store the state of an in-progress hash in the database. This is particularly
useful for working with large byte streams and allows the hashing to be paused and resumed
as needed.
"""

def _create_sha(self):
raise NotImplementedError

def db_value(self, value):
"""
Serialize the Hasher's state for storage in the database as plain-text.
"""
if value is None:
return None

sha_state = value.state()

# One of the fields is a byte string, let's base64 encode it to make sure
# we can store and fetch it regardless of default collocation.
sha_state[3] = base64.b64encode(sha_state[3])

return json.dumps(sha_state)
serialized_tate = pickle.dumps(value)
return serialized_state

def python_value(self, value):
"""
Restore the Hasher from its state stored in the database.
"""
if value is None:
return None

sha_state = json.loads(value)

# We need to base64 decode the data bytestring.
sha_state[3] = base64.b64decode(sha_state[3])
to_resume = self._create_sha()
to_resume.set_state(sha_state)
return to_resume
hasher = pickle.loads(value)
return hasher


class ResumableSHA256Field(_ResumableSHAField):
def _create_sha(self):
return resumablehashlib.sha256()
return rehash.sha256()


class ResumableSHA1Field(_ResumableSHAField):
def _create_sha(self):
return resumablehashlib.sha1()
return rehash.sha1()


class JSONField(TextField):
Expand Down
2 changes: 1 addition & 1 deletion data/model/appspecifictoken.py
Expand Up @@ -8,7 +8,7 @@
from data.model._basequery import update_last_accessed
from data.fields import DecryptedValue
from util.timedeltastring import convert_to_timedelta
from util.str import remove_unicode
from util.unicode import remove_unicode

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion data/model/user.py
Expand Up @@ -65,7 +65,7 @@
)
from util.backoff import exponential_backoff
from util.timedeltastring import convert_to_timedelta
from util.str import remove_unicode
from util.unicode import remove_unicode
from util.security.token import decode_public_private_token, encode_public_private_token


Expand Down
4 changes: 2 additions & 2 deletions data/registry_model/blobuploader.py
Expand Up @@ -5,7 +5,7 @@
from collections import namedtuple

import bitmath
import resumablehashlib
import rehash

from prometheus_client import Counter, Histogram

Expand Down Expand Up @@ -195,7 +195,7 @@ def upload_chunk(self, app_config, input_fp, start_offset=0, length=-1):
# already calculated hash data for the previous chunk(s).
piece_hasher = None
if self.blob_upload.chunk_count == 0 or self.blob_upload.piece_sha_state:
initial_sha1_value = self.blob_upload.piece_sha_state or resumablehashlib.sha1()
initial_sha1_value = self.blob_upload.piece_sha_state or rehash.sha1()
initial_sha1_pieces_value = self.blob_upload.piece_hashes or ""

piece_hasher = PieceHasher(
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
@@ -1,7 +1,6 @@
-e git+https://github.com/app-registry/appr-server.git@c2ef3b88afe926a92ef5f2e11e7d4a259e286a17#egg=cnr_server
-e git+https://github.com/coreos/mockldap.git@59a46efbe8c7cd8146a87a7c4f2b09746b953e11#egg=mockldap
-e git+https://github.com/coreos/py-bitbucket.git@55a1ada645f2fb6369147996ec71edd7828d91c8#egg=py_bitbucket
#-e git+https://github.com/coreos/resumablehashlib.git@b1b631249589b07adf40e0ee545b323a501340b4#egg=resumablehashlib
-e git+https://github.com/DevTable/aniso8601-fake.git@bd7762c7dea0498706d3f57db60cd8a8af44ba90#egg=aniso8601
-e git+https://github.com/DevTable/anunidecode.git@d59236a822e578ba3a0e5e5abbd3855873fa7a88#egg=anunidecode
-e git+https://github.com/DevTable/boto.git@a6a5c00bd199b1492e99199251b10451970b5b08#egg=boto
Expand Down Expand Up @@ -147,6 +146,7 @@ raven==6.10.0
recaptcha2==0.1
redis==3.3.8
redlock==1.2.0
rehash==1.0.0
reportlab #==3.1.44
requests-aws4auth==0.9
requests-file==1.4.3
Expand Down
4 changes: 2 additions & 2 deletions test/registry/registry_tests.py
Expand Up @@ -6,7 +6,7 @@

import binascii
import bencode
import resumablehashlib
import rehash

from werkzeug.datastructures import Accept

Expand Down Expand Up @@ -1660,7 +1660,7 @@ def test_pull_torrent(
assert torrent_info.get("announce") is not None

# Check the pieces.
sha = resumablehashlib.sha1()
sha = rehash.sha1()
sha.update(image.bytes)

expected = binascii.hexlify(sha.digest())
Expand Down
6 changes: 3 additions & 3 deletions test/registry_tests.py
Expand Up @@ -20,7 +20,7 @@
import bencode
import gpgme
import requests
import resumablehashlib
import rehash

from Crypto import Random
from Crypto.PublicKey import RSA
Expand Down Expand Up @@ -2196,7 +2196,7 @@ def test_get_basic_torrent(self):
self.assertIsNotNone(contents.get("info", {}).get("pieces"))
self.assertIsNotNone(contents.get("announce"))

sha = resumablehashlib.sha1()
sha = rehash.sha1()
sha.update(blobs[blobsum])

expected = binascii.hexlify(sha.digest())
Expand Down Expand Up @@ -2370,7 +2370,7 @@ def test_squashed_torrent(self):
self.assertIsNotNone(contents.get("announce"))

# Ensure the SHA1 matches the generated tar.
sha = resumablehashlib.sha1()
sha = rehash.sha1()
sha.update(squashed)

expected = binascii.hexlify(sha.digest())
Expand Down
6 changes: 3 additions & 3 deletions util/registry/torrent.py
Expand Up @@ -5,7 +5,7 @@

import bencode
import jwt
import resumablehashlib
import rehash


class TorrentConfiguration(object):
Expand Down Expand Up @@ -111,7 +111,7 @@ def __init__(
self._piece_hashes = bytearray(starting_piece_hash_bytes)

if hash_fragment_to_resume is None:
self._hash_fragment = resumablehashlib.sha1()
self._hash_fragment = rehash.sha1()
else:
self._hash_fragment = hash_fragment_to_resume

Expand All @@ -124,7 +124,7 @@ def update(self, buf):
if self._piece_offset() == 0 and to_hash_len > 0 and self._current_offset > 0:
# We are opening a new piece
self._piece_hashes.extend(self._hash_fragment.digest())
self._hash_fragment = resumablehashlib.sha1()
self._hash_fragment = rehash.sha1()

self._hash_fragment.update(buf_bytes_to_hash)
self._current_offset += to_hash_len
Expand Down
2 changes: 1 addition & 1 deletion util/validation.py
Expand Up @@ -9,7 +9,7 @@
INVALID_PASSWORD_MESSAGE = (
"Invalid password, password must be at least " + "8 characters and contain no whitespace."
)
VALID_CHARACTERS = string.digits + string.lowercase
VALID_CHARACTERS = string.digits + string.ascii_lowercase

MIN_USERNAME_LENGTH = 2
MAX_USERNAME_LENGTH = 255
Expand Down

0 comments on commit 9cb2de2

Please sign in to comment.