Skip to content

Commit

Permalink
Export of calibre data: Ensure individual part files in the exported …
Browse files Browse the repository at this point in the history
…data are no larger than one gigabyte even if the library contains individual files larger than that size.

Note that this means that exports created by calibre from this version
on will not be importable by earlier versions. However, exports from
earlier versions should still be importable.
  • Loading branch information
kovidgoyal committed Apr 21, 2024
1 parent bcc8ea4 commit 1df7047
Show file tree
Hide file tree
Showing 3 changed files with 239 additions and 92 deletions.
29 changes: 15 additions & 14 deletions src/calibre/db/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import weakref
from collections import defaultdict
from collections.abc import MutableSet, Set
from contextlib import closing
from functools import partial, wraps
from io import DEFAULT_BUFFER_SIZE, BytesIO
from queue import Queue
Expand Down Expand Up @@ -3162,10 +3161,10 @@ def report_progress(fname):
if mtime is not None:
mtime = timestampfromdt(mtime)
with exporter.start_file(key, mtime=mtime) as dest:
self._copy_format_to(book_id, fmt, dest, report_file_size=dest.ensure_space)
self._copy_format_to(book_id, fmt, dest)
cover_key = '{}:{}:{}'.format(key_prefix, book_id, '.cover')
with exporter.start_file(cover_key) as dest:
if not self.copy_cover_to(book_id, dest, report_file_size=dest.ensure_space):
if not self.copy_cover_to(book_id, dest):
dest.discard()
else:
fm['.cover'] = cover_key
Expand Down Expand Up @@ -3442,6 +3441,7 @@ def is_null_date(x):
dest_value.extend(src_value)
self._set_field(field, {dest_id: dest_value})


def import_library(library_key, importer, library_path, progress=None, abort=None):
from calibre.db.backend import DB
metadata = importer.metadata[library_key]
Expand All @@ -3455,25 +3455,22 @@ def report_progress(fname):
report_progress('metadata.db')
if abort is not None and abort.is_set():
return
with open(os.path.join(library_path, 'metadata.db'), 'wb') as f:
with closing(importer.start_file(metadata['metadata.db'], 'metadata.db for ' + library_path)) as src:
shutil.copyfileobj(src, f)
importer.save_file(metadata['metadata.db'], 'metadata.db for ' + library_path, os.path.join(library_path, 'metadata.db'))
if 'full-text-search.db' in metadata:
if progress is not None:
progress('full-text-search.db', 1, total)
if abort is not None and abort.is_set():
return
poff += 1
with open(os.path.join(library_path, 'full-text-search.db'), 'wb') as f:
with closing(importer.start_file(metadata['full-text-search.db'], 'full-text-search.db for ' + library_path)) as src:
shutil.copyfileobj(src, f)
importer.save_file(metadata['full-text-search.db'], 'full-text-search.db for ' + library_path,
os.path.join(library_path, 'full-text-search.db'))
if abort is not None and abort.is_set():
return
if 'notes.db' in metadata:
import zipfile
notes_dir = os.path.join(library_path, NOTES_DIR_NAME)
os.makedirs(notes_dir, exist_ok=True)
with closing(importer.start_file(metadata['notes.db'], 'notes.db for ' + library_path)) as stream:
with importer.start_file(metadata['notes.db'], 'notes.db for ' + library_path) as stream:
stream.check_hash = False
with zipfile.ZipFile(stream) as zf:
for zi in zf.infolist():
Expand All @@ -3482,6 +3479,8 @@ def report_progress(fname):
os.utime(tpath, (date_time, date_time))
if abort is not None and abort.is_set():
return
if importer.corrupted_files:
raise ValueError('Corrupted files:\n' + '\n'.join(importer.corrupted_files))
cache = Cache(DB(library_path, load_user_formatter_functions=False))
cache.init()

Expand All @@ -3494,20 +3493,22 @@ def report_progress(fname):
if progress is not None:
progress(title, i + poff, total)
cache._update_path((book_id,), mark_as_dirtied=False)
for fmt, fmtkey in iteritems(fmt_key_map):
for fmt, fmtkey in fmt_key_map.items():
if fmt == '.cover':
with closing(importer.start_file(fmtkey, _('Cover for %s') % title)) as stream:
with importer.start_file(fmtkey, _('Cover for %s') % title) as stream:
path = cache._field_for('path', book_id).replace('/', os.sep)
cache.backend.set_cover(book_id, path, stream, no_processing=True)
else:
with closing(importer.start_file(fmtkey, _('{0} format for {1}').format(fmt.upper(), title))) as stream:
with importer.start_file(fmtkey, _('{0} format for {1}').format(fmt.upper(), title)) as stream:
size, fname = cache._do_add_format(book_id, fmt, stream, mtime=stream.mtime)
cache.fields['formats'].table.update_fmt(book_id, fmt, fname, size, cache.backend)
for relpath, efkey in extra_files.get(book_id, {}).items():
with closing(importer.start_file(efkey, _('Extra file {0} for book {1}').format(relpath, title))) as stream:
with importer.start_file(efkey, _('Extra file {0} for book {1}').format(relpath, title)) as stream:
path = cache._field_for('path', book_id).replace('/', os.sep)
cache.backend.add_extra_file(relpath, stream, path)
cache.dump_metadata({book_id})
if importer.corrupted_files:
raise ValueError('Corrupted files:\n' + '\n'.join(importer.corrupted_files))
if progress is not None:
progress(_('Completed'), total, total)
return cache
Expand Down
21 changes: 19 additions & 2 deletions src/calibre/db/tests/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,21 @@ def test_fname_change(self):
def test_export_import(self):
from calibre.db.cache import import_library
from calibre.utils.exim import Exporter, Importer
with TemporaryDirectory('export_lib') as tdir:
for part_size in (8, 1, 1024):
exporter = Exporter(tdir, part_size=part_size + Exporter.tail_size())
files = {
'a': b'a' * 7, 'b': b'b' * 7, 'c': b'c' * 2, 'd': b'd' * 9, 'e': b'e' * 3,
}
for key, data in files.items():
exporter.add_file(BytesIO(data), key)
exporter.commit()
importer = Importer(tdir)
for key, expected in files.items():
with importer.start_file(key, key) as f:
actual = f.read()
self.assertEqual(expected, actual, key)
self.assertFalse(importer.corrupted_files)
cache = self.init_cache()
bookdir = os.path.dirname(cache.format_abspath(1, '__COVER_INTERNAL__'))
with open(os.path.join(bookdir, 'exf'), 'w') as f:
Expand All @@ -255,13 +270,14 @@ def test_export_import(self):
f.write('recurse')
self.assertEqual({ef.relpath for ef in cache.list_extra_files(1, pattern='sub/**/*')}, {'sub/recurse'})
self.assertEqual({ef.relpath for ef in cache.list_extra_files(1)}, {'exf', 'sub/recurse'})
for part_size in (1 << 30, 100, 1):
for part_size in (512, 1027, None):
with TemporaryDirectory('export_lib') as tdir, TemporaryDirectory('import_lib') as idir:
exporter = Exporter(tdir, part_size=part_size)
exporter = Exporter(tdir, part_size=part_size if part_size is None else (part_size + Exporter.tail_size()))
cache.export_library('l', exporter)
exporter.commit()
importer = Importer(tdir)
ic = import_library('l', importer, idir)
self.assertFalse(importer.corrupted_files)
self.assertEqual(cache.all_book_ids(), ic.all_book_ids())
for book_id in cache.all_book_ids():
self.assertEqual(cache.cover(book_id), ic.cover(book_id), 'Covers not identical for book: %d' % book_id)
Expand Down Expand Up @@ -290,6 +306,7 @@ def test_export_import(self):
exporter.commit()
importer = Importer(tdir)
ic = import_library('l', importer, idir)
self.assertFalse(importer.corrupted_files)
self.assertEqual(ic.fts_search('exim')[0]['id'], 1)
self.assertEqual(cache.notes_for('authors', 2), ic.notes_for('authors', 2))
a, b = cache.get_notes_resource(r1), ic.get_notes_resource(r1)
Expand Down
Loading

0 comments on commit 1df7047

Please sign in to comment.