Skip to content

Commit

Permalink
Fixing gsutil upload/download. Divide file list into chunks.
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexandr Bogomyakov authored and arodrime committed Jul 31, 2020
1 parent 6802d29 commit 9d945bf
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 5 deletions.
9 changes: 7 additions & 2 deletions medusa/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
from medusa.index import add_backup_start_to_index, add_backup_finish_to_index, set_latest_backup_in_index
from medusa.monitoring import Monitoring
from medusa.storage.s3_storage import is_aws_s3
from medusa.storage import Storage, format_bytes_str, ManifestObject
from medusa.storage.google_storage import GSUTIL_MAX_FILES_PER_CHUNK
from medusa.storage import Storage, format_bytes_str, ManifestObject, divide_chunks


class NodeBackupCache(object):
Expand Down Expand Up @@ -322,7 +323,11 @@ def backup_snapshots(storage, manifest, node_backup, node_backup_cache, snapshot

manifest_objects = list()
if len(needs_backup) > 0:
manifest_objects = storage.storage_driver.upload_blobs(needs_backup, dst_path)
# If there is a plenty of files to upload it should be
# splitted to batches due to 'gsutil cp' which
# can't handle too much source files via STDIN.
for src_batch in divide_chunks(needs_backup, GSUTIL_MAX_FILES_PER_CHUNK):
manifest_objects += storage.storage_driver.upload_blobs(src_batch, dst_path)

# Reintroducing already backed up objects in the manifest in differential
for obj in already_backed_up:
Expand Down
8 changes: 5 additions & 3 deletions medusa/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
import pathlib
import sys

from medusa.storage import Storage
from medusa.storage import Storage, divide_chunks
from medusa.storage.google_storage import GSUTIL_MAX_FILES_PER_CHUNK


def download_data(storageconfig, backup, fqtns_to_restore, destination):
Expand All @@ -43,8 +44,9 @@ def download_data(storageconfig, backup, fqtns_to_restore, destination):
subfolder.mkdir(parents=False)

if len(srcs) > 0 and (len(fqtns_to_restore) == 0 or fqtn in fqtns_to_restore):
logging.info('Downloading backup data')
storage.storage_driver.download_blobs(srcs, dst)
logging.debug('Downloading %s files to %s', len(srcs), dst)
for src_batch in divide_chunks(srcs, GSUTIL_MAX_FILES_PER_CHUNK):
storage.storage_driver.download_blobs(src_batch, dst)
elif len(srcs) == 0 and (len(fqtns_to_restore) == 0 or fqtn in fqtns_to_restore):
logging.debug('There is nothing to download for {}'.format(fqtn))
else:
Expand Down
5 changes: 5 additions & 0 deletions medusa/storage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@
INDEX_BLOB_WITH_TIMESTAMP_PATTERN = re.compile('.*(started|finished)_(.*)_([0-9]+).timestamp$')


def divide_chunks(values, step):
for i in range(0, len(values), step):
yield values[i:i + step]


def format_bytes_str(value):
for unit_shift, unit in enumerate(['B', 'KB', 'MB', 'GB', 'TB']):
if value >> (unit_shift * 10) < 1024:
Expand Down
3 changes: 3 additions & 0 deletions medusa/storage/google_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
from medusa.storage.google_cloud_storage.gsutil import GSUtil


GSUTIL_MAX_FILES_PER_CHUNK = 64


class GoogleStorage(AbstractStorage):

def connect_storage(self):
Expand Down

0 comments on commit 9d945bf

Please sign in to comment.