From aa3d9c017fc4e8ad7e4972d7b4d53232641c5f6a Mon Sep 17 00:00:00 2001 From: Thomas Goyne Date: Fri, 15 Mar 2024 09:19:33 -0700 Subject: [PATCH] Clean up a bunch of old encryption cruft The global shared cache of encrypted file maps was originally required because we actually opened Realm files mulitple times in normal usage, so each of the open files had to know about each other to copy things around. #4839 made it so that in normal usage we only ever have one DB instance per file per process, so it became dead code. Multiprocess encryption made it unneccesary even when the one-DB-per-process rule is violated, as the multiprocess code path covers that. This eliminates our last reliance on file UniqueIDs, so it lets us get rid of hacks related to that. The encryption page reclaimer mostly never actually worked. It used a very conserative page reclaimation rule that meant that pages would never be reclaimed if there was a long-lived Transaction, even if it was frozen or kept refreshed. This is very common in practice, and when it doesn't happen the DB usually isn't kept open either, making it redundant. Encryption used to rely on handling BAD_EXEC signals (or mach exceptions) rather than explicit barriers, so it had to read and write in page-sized chunks. That's no longer the case, so we can eliminate a lot of complexity by always reading and writing in 4k blocks. --- src/realm/alloc.cpp | 36 +- src/realm/alloc.hpp | 55 +- src/realm/alloc_slab.cpp | 102 +- src/realm/alloc_slab.hpp | 18 +- src/realm/db.cpp | 34 +- src/realm/db.hpp | 7 +- src/realm/exec/realm_decrypt.cpp | 6 +- src/realm/group.cpp | 4 - src/realm/group_writer.cpp | 6 +- src/realm/group_writer.hpp | 3 + src/realm/node_header.hpp | 1 - .../object-store/impl/realm_coordinator.cpp | 2 +- .../object-store/impl/realm_coordinator.hpp | 2 +- src/realm/query_engine.hpp | 1 + src/realm/sync/noinst/client_impl_base.hpp | 1 + src/realm/sync/noinst/server/server.cpp | 1 - src/realm/sync/tools/print_changeset.cpp | 1 - src/realm/transaction.cpp | 3 - src/realm/util/aes_cryptor.hpp | 71 +- src/realm/util/encrypted_file_mapping.cpp | 1133 ++++++++--------- src/realm/util/encrypted_file_mapping.hpp | 213 ++-- src/realm/util/file.cpp | 442 +++---- src/realm/util/file.hpp | 270 +--- src/realm/util/file_mapper.cpp | 844 ++---------- src/realm/util/file_mapper.hpp | 152 +-- src/realm/util/load_file.cpp | 11 +- src/realm/util/load_file.hpp | 1 - src/realm/util/safe_int_ops.hpp | 64 +- src/realm/utilities.hpp | 16 +- test/fuzz_group.cpp | 1 + test/object-store/sync/client_reset.cpp | 8 +- test/object-store/sync/sync_manager.cpp | 4 +- .../object-store/util/sync/baas_admin_api.cpp | 4 +- test/realm-fuzzer/fuzz_configurator.cpp | 6 +- test/realm-fuzzer/fuzz_configurator.hpp | 5 +- test/test_all.cpp | 15 - test/test_alloc.cpp | 9 +- test/test_compaction.cpp | 1 + test/test_encrypted_file_mapping.cpp | 218 ++-- test/test_file.cpp | 326 +++-- test/test_group.cpp | 4 +- test/test_json.cpp | 2 +- test/test_lang_bind_helper.cpp | 1 + test/test_shared.cpp | 21 +- test/test_transactions.cpp | 178 --- test/test_transform.cpp | 43 +- test/test_upgrade_database.cpp | 4 +- test/test_util_logger.cpp | 4 +- test/util/spawned_process.cpp | 1 + test/util/test_path.hpp | 8 +- 50 files changed, 1401 insertions(+), 2962 deletions(-) diff --git a/src/realm/alloc.cpp b/src/realm/alloc.cpp index f15cddfdffd..7f9817e2fbe 100644 --- a/src/realm/alloc.cpp +++ b/src/realm/alloc.cpp @@ -119,9 +119,7 @@ char* Allocator::translate_less_critical(RefTranslation* ref_translation_ptr, re RefTranslation& txl = ref_translation_ptr[idx]; size_t offset = ref - get_section_base(idx); char* addr = txl.mapping_addr + offset; -#if REALM_ENABLE_ENCRYPTION - realm::util::encryption_read_barrier(addr, NodeHeader::header_size, txl.encrypted_mapping, nullptr); -#endif + util::encryption_read_barrier(addr, NodeHeader::header_size, txl.encrypted_mapping); auto size = NodeHeader::get_byte_size_from_header(addr); bool crosses_mapping = offset + size > (1 << section_shift); // Move the limit on use of the existing primary mapping. @@ -135,27 +133,21 @@ char* Allocator::translate_less_critical(RefTranslation* ref_translation_ptr, re } if (REALM_LIKELY(!crosses_mapping)) { // Array fits inside primary mapping, no new mapping needed. -#if REALM_ENABLE_ENCRYPTION - realm::util::encryption_read_barrier(addr, size, txl.encrypted_mapping, nullptr); -#endif + util::encryption_read_barrier(addr, size, txl.encrypted_mapping); return addr; } - else { - // we need a cross-over mapping. If one is already established, use that. - auto xover_mapping_addr = txl.xover_mapping_addr.load(std::memory_order_acquire); - if (!xover_mapping_addr) { - // we need to establish a xover mapping - or wait for another thread to finish - // establishing one: - const_cast(this)->get_or_add_xover_mapping(txl, idx, offset, size); - // reload (can be relaxed since the call above synchronizes on a mutex) - xover_mapping_addr = txl.xover_mapping_addr.load(std::memory_order_relaxed); - } - // array is now known to be inside the established xover mapping: - addr = xover_mapping_addr + (offset - txl.xover_mapping_base); -#if REALM_ENABLE_ENCRYPTION - realm::util::encryption_read_barrier(addr, size, txl.xover_encrypted_mapping, nullptr); -#endif - return addr; + // we need a cross-over mapping. If one is already established, use that. + auto xover_mapping_addr = txl.xover_mapping_addr.load(std::memory_order_acquire); + if (!xover_mapping_addr) { + // we need to establish a xover mapping - or wait for another thread to finish + // establishing one: + const_cast(this)->get_or_add_xover_mapping(txl, idx, offset, size); + // reload (can be relaxed since the call above synchronizes on a mutex) + xover_mapping_addr = txl.xover_mapping_addr.load(std::memory_order_relaxed); } + // array is now known to be inside the established xover mapping: + addr = xover_mapping_addr + (offset - txl.xover_mapping_base); + util::encryption_read_barrier(addr, size, txl.xover_encrypted_mapping); + return addr; } } // namespace realm diff --git a/src/realm/alloc.hpp b/src/realm/alloc.hpp index f3515e36826..2a18e93ab1d 100644 --- a/src/realm/alloc.hpp +++ b/src/realm/alloc.hpp @@ -171,7 +171,7 @@ class Allocator { // into equal chunks. struct RefTranslation { char* mapping_addr; - uint64_t cookie; + uint64_t cookie = 0x1234567890; std::atomic lowest_possible_xover_offset = 0; // member 'xover_mapping_addr' is used for memory synchronization of the fields @@ -183,14 +183,12 @@ class Allocator { #if REALM_ENABLE_ENCRYPTION util::EncryptedFileMapping* encrypted_mapping = nullptr; util::EncryptedFileMapping* xover_encrypted_mapping = nullptr; +#else + static inline util::EncryptedFileMapping* const encrypted_mapping = nullptr; + static inline util::EncryptedFileMapping* const xover_encrypted_mapping = nullptr; #endif - explicit RefTranslation(char* addr) + explicit RefTranslation(char* addr = nullptr) : mapping_addr(addr) - , cookie(0x1234567890) - { - } - RefTranslation() - : RefTranslation(nullptr) { } ~RefTranslation() @@ -222,7 +220,7 @@ class Allocator { }; // This pointer may be changed concurrently with access, so make sure it is // atomic! - std::atomic m_ref_translation_ptr; + std::atomic m_ref_translation_ptr{nullptr}; /// The specified size must be divisible by 8, and must not be /// zero. @@ -252,7 +250,7 @@ class Allocator { char* translate_critical(RefTranslation*, ref_type ref) const noexcept; char* translate_less_critical(RefTranslation*, ref_type ref) const noexcept; virtual void get_or_add_xover_mapping(RefTranslation&, size_t, size_t, size_t) = 0; - Allocator() noexcept; + Allocator() noexcept = default; size_t get_section_index(size_t pos) const noexcept; inline size_t get_section_base(size_t index) const noexcept; @@ -271,11 +269,9 @@ class Allocator { // used to detect if the allocator (and owning structure, e.g. Table) // is recycled. Mismatch on this counter will cause accesors // lower in the hierarchy to throw if access is attempted. - std::atomic m_content_versioning_counter; - - std::atomic m_storage_versioning_counter; - - std::atomic m_instance_versioning_counter; + std::atomic m_content_versioning_counter{0}; + std::atomic m_storage_versioning_counter{0}; + std::atomic m_instance_versioning_counter{0}; inline uint_fast64_t get_storage_version(uint64_t instance_version) { @@ -547,14 +543,6 @@ inline bool Allocator::is_read_only(ref_type ref) const noexcept return ref < m_baseline.load(std::memory_order_relaxed); } -inline Allocator::Allocator() noexcept -{ - m_content_versioning_counter = 0; - m_storage_versioning_counter = 0; - m_instance_versioning_counter = 0; - m_ref_translation_ptr = nullptr; -} - // performance critical part of the translation process. Less critical code is in translate_less_critical. inline char* Allocator::translate_critical(RefTranslation* ref_translation_ptr, ref_type ref) const noexcept { @@ -566,30 +554,23 @@ inline char* Allocator::translate_critical(RefTranslation* ref_translation_ptr, if (REALM_LIKELY(offset < lowest_possible_xover_offset)) { // the lowest possible xover offset may grow concurrently, but that will not affect this code path char* addr = txl.mapping_addr + offset; -#if REALM_ENABLE_ENCRYPTION - realm::util::encryption_read_barrier(addr, NodeHeader::header_size, txl.encrypted_mapping, - NodeHeader::get_byte_size_from_header); -#endif + util::encryption_read_barrier(addr, NodeHeader::header_size, txl.encrypted_mapping); + size_t size = NodeHeader::get_byte_size_from_header(addr); + util::encryption_read_barrier(addr, size, txl.encrypted_mapping); return addr; } - else { - // the lowest possible xover offset may grow concurrently, but that will be handled inside the call - return translate_less_critical(ref_translation_ptr, ref); - } + // the lowest possible xover offset may grow concurrently, but that will be handled inside the call + return translate_less_critical(ref_translation_ptr, ref); } realm::util::terminate("Invalid ref translation entry", __FILE__, __LINE__, txl.cookie, 0x1234567890, ref, idx); - return nullptr; } inline char* Allocator::translate(ref_type ref) const noexcept { - auto ref_translation_ptr = m_ref_translation_ptr.load(std::memory_order_acquire); - if (REALM_LIKELY(ref_translation_ptr)) { - return translate_critical(ref_translation_ptr, ref); - } - else { - return do_translate(ref); + if (auto ptr = m_ref_translation_ptr.load(std::memory_order_acquire); REALM_LIKELY(ptr)) { + return translate_critical(ptr, ref); } + return do_translate(ref); } diff --git a/src/realm/alloc_slab.cpp b/src/realm/alloc_slab.cpp index af175d1965b..d65afec42ff 100644 --- a/src/realm/alloc_slab.cpp +++ b/src/realm/alloc_slab.cpp @@ -16,15 +16,13 @@ * **************************************************************************/ -#include -#include -#include #include -#include -#include -#include #include +#include #include +#include +#include +#include #if REALM_DEBUG #include @@ -35,13 +33,13 @@ #include #endif -#include #include -#include -#include +#include #include +#include #include #include +#include #include using namespace realm; @@ -164,9 +162,6 @@ void SlabAlloc::detach(bool keep_file_open) noexcept // placed correctly (logically) after the end of the file. m_slabs.clear(); clear_freelists(); -#if REALM_ENABLE_ENCRYPTION - m_realm_file_info = nullptr; -#endif m_attach_mode = attach_None; } @@ -661,7 +656,7 @@ int SlabAlloc::get_committed_file_format_version() noexcept // if we have mapped a file, m_mappings will have at least one mapping and // the first will be to the start of the file. Don't come here, if we're // just attaching a buffer. They don't have mappings. - realm::util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header)); + util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header)); } } const Header& header = *reinterpret_cast(m_data); @@ -805,10 +800,6 @@ ref_type SlabAlloc::attach_file(const std::string& path, Config& cfg, util::Writ // the call below to set_encryption_key. m_file.set_encryption_key(cfg.encryption_key); - note_reader_start(this); - util::ScopeExit reader_end_guard([this]() noexcept { - note_reader_end(this); - }); size_t size = 0; // The size of a database file must not exceed what can be encoded in // size_t. @@ -840,26 +831,17 @@ ref_type SlabAlloc::attach_file(const std::string& path, Config& cfg, util::Writ if (size == 0) { if (REALM_UNLIKELY(cfg.read_only)) throw InvalidDatabase("Read-only access to empty Realm file", path); - - size_t initial_size = page_size(); - // exFAT does not allocate a unique id for the file until it is non-empty. It must be - // valid at this point because File::get_unique_id() is used to distinguish - // mappings_for_file in the encryption layer. So the prealloc() is required before - // interacting with the encryption layer in File::write(). - // Pre-alloc initial space - m_file.prealloc(initial_size); // Throws - // seek() back to the start of the file in preparation for writing the header - // This sequence of File operations is protected from races by - // DB::m_controlmutex, so we know we are the only ones operating on the file - m_file.seek(0); + // We want all non-streaming files to be a multiple of the page size + // to simplify memory mapping, so just pre-reserve the required space now + m_file.prealloc(page_size()); // Throws const char* data = reinterpret_cast(&empty_file_header); - m_file.write(data, sizeof empty_file_header); // Throws + m_file.write(0, data, sizeof empty_file_header); // Throws bool disable_sync = get_disable_sync_to_disk() || cfg.disable_sync; if (!disable_sync) m_file.sync(); // Throws - size = initial_size; + size = m_file.get_size(); } ref_type top_ref = read_and_validate_header(m_file, path, size, cfg.session_initiator, m_write_observer); @@ -883,12 +865,9 @@ ref_type SlabAlloc::attach_file(const std::string& path, Config& cfg, util::Writ update_reader_view(size); REALM_ASSERT(m_mappings.size()); m_data = m_mappings[0].primary_mapping.get_addr(); - realm::util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header)); + util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header)); dg.release(); // Do not detach fcg.release(); // Do not close -#if REALM_ENABLE_ENCRYPTION - m_realm_file_info = util::get_file_info_for_file(m_file); -#endif return top_ref; } @@ -905,40 +884,20 @@ void SlabAlloc::convert_from_streaming_form(ref_type top_ref) { File::Map
writable_map(m_file, File::access_ReadWrite, sizeof(Header)); // Throws Header& writable_header = *writable_map.get_addr(); - realm::util::encryption_read_barrier_for_write(writable_map, 0); + util::encryption_read_barrier(writable_map, 0); writable_header.m_top_ref[1] = top_ref; writable_header.m_file_format[1] = writable_header.m_file_format[0]; realm::util::encryption_write_barrier(writable_map, 0); writable_map.sync(); - realm::util::encryption_read_barrier_for_write(writable_map, 0); + util::encryption_read_barrier(writable_map, 0); writable_header.m_flags |= flags_SelectBit; realm::util::encryption_write_barrier(writable_map, 0); writable_map.sync(); - realm::util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header)); + util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header)); } } -void SlabAlloc::note_reader_start(const void* reader_id) -{ -#if REALM_ENABLE_ENCRYPTION - if (m_realm_file_info) - util::encryption_note_reader_start(*m_realm_file_info, reader_id); -#else - static_cast(reader_id); -#endif -} - -void SlabAlloc::note_reader_end(const void* reader_id) noexcept -{ -#if REALM_ENABLE_ENCRYPTION - if (m_realm_file_info) - util::encryption_note_reader_end(*m_realm_file_info, reader_id); -#else - static_cast(reader_id); -#endif -} - ref_type SlabAlloc::attach_buffer(const char* data, size_t size) { // ExceptionSafety: If this function throws, it must leave the allocator in @@ -1009,8 +968,8 @@ ref_type SlabAlloc::read_and_validate_header(util::File& file, const std::string { try { // we'll read header and (potentially) footer - File::Map map_header(file, File::access_ReadOnly, sizeof(Header), 0, write_observer); - realm::util::encryption_read_barrier(map_header, 0, sizeof(Header)); + File::Map map_header(file, File::access_ReadOnly, sizeof(Header), write_observer); + util::encryption_read_barrier(map_header, 0, sizeof(Header)); auto header = reinterpret_cast(map_header.get_addr()); File::Map map_footer; @@ -1020,12 +979,12 @@ ref_type SlabAlloc::read_and_validate_header(util::File& file, const std::string size_t footer_page_base = footer_ref & ~(page_size() - 1); size_t footer_offset = footer_ref - footer_page_base; map_footer = File::Map(file, footer_page_base, File::access_ReadOnly, - sizeof(StreamingFooter) + footer_offset, 0, write_observer); - realm::util::encryption_read_barrier(map_footer, footer_offset, sizeof(StreamingFooter)); + sizeof(StreamingFooter) + footer_offset, write_observer); + util::encryption_read_barrier(map_footer, footer_offset, sizeof(StreamingFooter)); footer = reinterpret_cast(map_footer.get_addr() + footer_offset); } - auto top_ref = validate_header(header, footer, size, path, file.get_encryption_key() != nullptr); // Throws + auto top_ref = validate_header(header, footer, size, path, file.get_encryption() != nullptr); // Throws if (session_initiator && is_file_on_streaming_form(*header)) { // Don't compare file format version fields as they are allowed to differ. @@ -1278,7 +1237,7 @@ void SlabAlloc::update_reader_view(size_t file_size) const size_t section_size = std::min(1 << section_shift, file_size - section_start_offset); if (section_size == (1 << section_shift)) { new_mappings.push_back({util::File::Map(m_file, section_start_offset, File::access_ReadOnly, - section_size, 0, m_write_observer)}); + section_size, m_write_observer)}); } else { new_mappings.push_back({util::File::Map()}); @@ -1291,7 +1250,7 @@ void SlabAlloc::update_reader_view(size_t file_size) throw std::bad_alloc(); } else { - new_mappings.back().primary_mapping.map(m_file, File::access_ReadOnly, section_size, 0, + new_mappings.back().primary_mapping.map(m_file, File::access_ReadOnly, section_size, section_start_offset, m_write_observer); } } @@ -1352,16 +1311,9 @@ void SlabAlloc::update_reader_view(size_t file_size) void SlabAlloc::schedule_refresh_of_outdated_encrypted_pages() { #if REALM_ENABLE_ENCRYPTION - // callers must already hold m_mapping_mutex - for (auto& e : m_mappings) { - if (auto m = e.primary_mapping.get_encrypted_mapping()) { - encryption_mark_pages_for_IV_check(m); - } - if (auto m = e.xover_mapping.get_encrypted_mapping()) { - encryption_mark_pages_for_IV_check(m); - } + if (auto encryption = m_file.get_encryption()) { + encryption->mark_data_as_possibly_stale(); } - // unsafe to do outside writing thread: verify(); #endif // REALM_ENABLE_ENCRYPTION } @@ -1457,7 +1409,7 @@ void SlabAlloc::get_or_add_xover_mapping(RefTranslation& txl, size_t index, size auto end_offset = file_offset + size; auto mapping_file_offset = file_offset & ~(_page_size - 1); auto minimal_mapping_size = end_offset - mapping_file_offset; - util::File::Map mapping(m_file, mapping_file_offset, File::access_ReadOnly, minimal_mapping_size, 0, + util::File::Map mapping(m_file, mapping_file_offset, File::access_ReadOnly, minimal_mapping_size, m_write_observer); map_entry->xover_mapping = std::move(mapping); } diff --git a/src/realm/alloc_slab.hpp b/src/realm/alloc_slab.hpp index e1ad8a0ca9f..df7cf413b9e 100644 --- a/src/realm/alloc_slab.hpp +++ b/src/realm/alloc_slab.hpp @@ -19,20 +19,18 @@ #ifndef REALM_ALLOC_SLAB_HPP #define REALM_ALLOC_SLAB_HPP +#include #include // unint8_t etc -#include #include -#include -#include #include +#include +#include #include #include #include -#include #include #include -#include #include namespace realm { @@ -41,10 +39,6 @@ namespace realm { class Group; class GroupWriter; -namespace util { -struct SharedFileInfo; -} // namespace util - /// Thrown by Group and DB constructors if the specified file /// (or memory buffer) does not appear to contain a valid Realm /// database. @@ -363,11 +357,6 @@ class SlabAlloc : public Allocator { /// Returns total amount of slab for all slab allocators static size_t get_total_slab_size() noexcept; - /// Hooks used to keep the encryption layer informed of the start and stop - /// of transactions. - void note_reader_start(const void* reader_id); - void note_reader_end(const void* reader_id) noexcept; - /// Read the header (and possibly footer) from the file, returning the top ref if it's valid and throwing /// InvalidDatabase otherwise. static ref_type read_and_validate_header(util::File& file, const std::string& path, size_t size, @@ -656,7 +645,6 @@ class SlabAlloc : public Allocator { uint64_t m_youngest_live_version = 1; std::mutex m_mapping_mutex; util::File m_file; - util::SharedFileInfo* m_realm_file_info = nullptr; // vectors where old mappings, are held from deletion to ensure translations are // kept open and ref->ptr translations work for other threads.. std::vector m_old_mappings; diff --git a/src/realm/db.cpp b/src/realm/db.cpp index ac2967bfdb9..87180472ec4 100644 --- a/src/realm/db.cpp +++ b/src/realm/db.cpp @@ -776,7 +776,7 @@ class DB::FileVersionManager final : public DB::VersionManager { auto new_size = static_cast(m_file.get_size()); REALM_ASSERT(new_size > size); size = new_size; - m_reader_map.remap(m_file, File::access_ReadWrite, size, File::map_NoSync); + m_reader_map.remap(m_file, File::access_ReadWrite, size); m_info = m_reader_map.get_addr(); std::lock_guard lock(m_mutex); @@ -820,7 +820,7 @@ class DB::FileVersionManager final : public DB::VersionManager { }; // adapter class for marking/observing encrypted writes -class DB::EncryptionMarkerObserver : public util::WriteMarker, public util::WriteObserver { +class DB::EncryptionMarkerObserver final : public util::WriteMarker, public util::WriteObserver { public: EncryptionMarkerObserver(DB::VersionManager& vm) : vm(vm) @@ -840,7 +840,7 @@ class DB::EncryptionMarkerObserver : public util::WriteMarker, public util::Writ } ++calls_since_last_writer_observed; constexpr size_t max_calls = 5; // an arbitrary handful, > 1 - return (calls_since_last_writer_observed >= max_calls); + return calls_since_last_writer_observed >= max_calls; } void mark(uint64_t pos) override { @@ -985,7 +985,7 @@ void DB::open(const std::string& path, const DBOptions& options) // get the exclusive lock because we hold it, and hence were // waiting for the shared lock instead, to observe and use an // old lock file. - m_file_map.map(m_file, File::access_ReadWrite, sizeof(SharedInfo), File::map_NoSync); // Throws + m_file_map.map(m_file, File::access_ReadWrite, sizeof(SharedInfo)); // Throws File::UnmapGuard fug(m_file_map); SharedInfo* info = m_file_map.get_addr(); @@ -1050,7 +1050,7 @@ void DB::open(const std::string& path, const DBOptions& options) // the SharedInfo struct, or less if the file is smaller. We know that // we have at least one byte, and that is enough to read the // `init_complete` flag. - m_file_map.map(m_file, File::access_ReadWrite, info_size, File::map_NoSync); + m_file_map.map(m_file, File::access_ReadWrite, info_size); File::UnmapGuard fug_1(m_file_map); SharedInfo* info = m_file_map.get_addr(); @@ -1187,21 +1187,11 @@ void DB::open(const std::string& path, const DBOptions& options) // From here on, if we fail in any way, we must detach the // allocator. SlabAlloc::DetachGuard alloc_detach_guard(alloc); - alloc.note_reader_start(this); - // must come after the alloc detach guard - auto handler = [this, &alloc]() noexcept { - alloc.note_reader_end(this); - }; - auto reader_end_guard = make_scope_exit(handler); // Check validity of top array (to give more meaningful errors // early) if (top_ref) { try { - alloc.note_reader_start(this); - auto reader_end_guard = make_scope_exit([&]() noexcept { - alloc.note_reader_end(this); - }); Array top{alloc}; top.init_from_ref(top_ref); Group::validate_top_array(top, alloc); @@ -1636,7 +1626,13 @@ bool DB::compact(bool bump_version_number, util::Optional output_en } auto info = m_info; Durability dura = Durability(info->durability); - const char* write_key = bool(output_encryption_key) ? *output_encryption_key : get_encryption_key(); + const char* write_key = nullptr; + if (output_encryption_key) { + write_key = *output_encryption_key; + } + else if (auto encryption = m_alloc.get_file().get_encryption()) { + write_key = encryption->get_key(); + } { std::unique_lock lock(m_controlmutex); // Throws auto t1 = std::chrono::steady_clock::now(); @@ -1681,9 +1677,7 @@ bool DB::compact(bool bump_version_number, util::Optional output_en catch (...) { // If writing the compact version failed in any way, delete the partially written file to clean up disk // space. This is so that we don't fail with 100% disk space used when compacting on a mostly full disk. - if (File::exists(tmp_path)) { - File::remove(tmp_path); - } + File::try_remove(tmp_path); throw; } // if we've written a file with a bumped version number, we need to update the lock file to match. @@ -1727,7 +1721,7 @@ bool DB::compact(bool bump_version_number, util::Optional output_en return true; } -void DB::write_copy(StringData path, const char* output_encryption_key) +void DB::write_copy(std::string_view path, const char* output_encryption_key) { auto tr = start_read(); if (auto hist = tr->get_history()) { diff --git a/src/realm/db.hpp b/src/realm/db.hpp index af8fc23633c..0b275c3d45e 100644 --- a/src/realm/db.hpp +++ b/src/realm/db.hpp @@ -198,11 +198,6 @@ class DB : public std::enable_shared_from_this { return m_db_path; } - const char* get_encryption_key() const noexcept - { - return m_alloc.m_file.get_encryption_key(); - } - #ifdef REALM_DEBUG /// Deprecated method, only called from a unit test /// @@ -335,7 +330,7 @@ class DB : public std::enable_shared_from_this { bool compact(bool bump_version_number = false, util::Optional output_encryption_key = util::none) REQUIRES(!m_mutex); - void write_copy(StringData path, const char* output_encryption_key) REQUIRES(!m_mutex); + void write_copy(std::string_view path, const char* output_encryption_key) REQUIRES(!m_mutex); #ifdef REALM_DEBUG void test_ringbuf(); diff --git a/src/realm/exec/realm_decrypt.cpp b/src/realm/exec/realm_decrypt.cpp index 134bc0d1818..2c647fb9dac 100644 --- a/src/realm/exec/realm_decrypt.cpp +++ b/src/realm/exec/realm_decrypt.cpp @@ -13,13 +13,13 @@ constexpr size_t block_size = 4096; int main(int argc, const char* argv[]) { if (argc > 3) { - const uint8_t* key_ptr = nullptr; + const char* key_ptr = nullptr; char key[64]; std::string outfilename = "out.realm"; for (int curr_arg = 1; curr_arg < argc; curr_arg++) { if (strcmp(argv[curr_arg], "--key") == 0) { hex_to_bin(argv[curr_arg + 1], key); - key_ptr = reinterpret_cast(key); + key_ptr = key; curr_arg++; } else if (strcmp(argv[curr_arg], "--out") == 0) { @@ -36,7 +36,7 @@ int main(int argc, const char* argv[]) auto size = (off_t)file.get_size(); decltype(size) pos = 0; util::AESCryptor cryptor(key_ptr); - cryptor.set_file_size(size); + cryptor.set_data_size(size); while (pos < size) { char buf[block_size]; cryptor.try_read_block(file.get_descriptor(), pos, buf); diff --git a/src/realm/group.cpp b/src/realm/group.cpp index a0f2f5deac5..d90e53bcdc6 100644 --- a/src/realm/group.cpp +++ b/src/realm/group.cpp @@ -985,10 +985,6 @@ void Group::write(File& file, const char* encryption_key, uint_fast64_t version_ file.set_encryption_key(encryption_key); - // Force the file system to allocate a node so we get a stable unique id. - // See File::get_unique_id(). This is used to distinguish encrypted mappings. - file.resize(1); - // The aim is that the buffer size should be at least 1/256 of needed size but less than 64 Mb constexpr size_t upper_bound = 64 * 1024 * 1024; size_t min_space = std::min(get_used_space() >> 8, upper_bound); diff --git a/src/realm/group_writer.cpp b/src/realm/group_writer.cpp index 9243d23e0b6..2e6651101fa 100644 --- a/src/realm/group_writer.cpp +++ b/src/realm/group_writer.cpp @@ -151,7 +151,7 @@ bool WriteWindowMgr::MapWindow::extends_to_match(util::File& f, ref_type start_r size_t window_size = get_window_size(f, start_ref, size); m_map.sync(); m_map.unmap(); - m_map.map(f, File::access_ReadWrite, window_size, 0, m_base_ref); + m_map.map(f, File::access_ReadWrite, window_size, m_base_ref); return true; } @@ -161,7 +161,7 @@ WriteWindowMgr::MapWindow::MapWindow(size_t alignment, util::File& f, ref_type s { m_base_ref = aligned_to_mmap_block(start_ref); size_t window_size = get_window_size(f, start_ref, size); - m_map.map(f, File::access_ReadWrite, window_size, 0, m_base_ref); + m_map.map(f, File::access_ReadWrite, window_size, m_base_ref); #if REALM_ENABLE_ENCRYPTION if (auto p = m_map.get_encrypted_mapping()) p->set_marker(write_marker); @@ -194,7 +194,7 @@ char* WriteWindowMgr::MapWindow::translate(ref_type ref) void WriteWindowMgr::MapWindow::encryption_read_barrier(void* start_addr, size_t size) { - realm::util::encryption_read_barrier_for_write(start_addr, size, m_map.get_encrypted_mapping()); + util::encryption_read_barrier_for_write(start_addr, size, m_map.get_encrypted_mapping()); } void WriteWindowMgr::MapWindow::encryption_write_barrier(void* start_addr, size_t size) diff --git a/src/realm/group_writer.hpp b/src/realm/group_writer.hpp index 438879114c6..c0c59145ffa 100644 --- a/src/realm/group_writer.hpp +++ b/src/realm/group_writer.hpp @@ -35,6 +35,9 @@ namespace realm { // Pre-declarations class Transaction; class SlabAlloc; +namespace util { +class WriteMarker; +} class Reachable { public: diff --git a/src/realm/node_header.hpp b/src/realm/node_header.hpp index 0251c796478..453f6100eaf 100644 --- a/src/realm/node_header.hpp +++ b/src/realm/node_header.hpp @@ -206,7 +206,6 @@ class NodeHeader { uint_least8_t width = get_width_from_header(header); WidthType wtype = get_wtype_from_header(header); size_t num_bytes = calc_byte_size(wtype, size, width); - return num_bytes; } diff --git a/src/realm/object-store/impl/realm_coordinator.cpp b/src/realm/object-store/impl/realm_coordinator.cpp index 2131a469ee7..eadd7b1638d 100644 --- a/src/realm/object-store/impl/realm_coordinator.cpp +++ b/src/realm/object-store/impl/realm_coordinator.cpp @@ -1258,7 +1258,7 @@ bool RealmCoordinator::compact() return m_db->compact(); } -void RealmCoordinator::write_copy(StringData path, const char* key) +void RealmCoordinator::write_copy(std::string_view path, const char* key) { m_db->write_copy(path, key); } diff --git a/src/realm/object-store/impl/realm_coordinator.hpp b/src/realm/object-store/impl/realm_coordinator.hpp index 3608e9c8596..3485cbfff7c 100644 --- a/src/realm/object-store/impl/realm_coordinator.hpp +++ b/src/realm/object-store/impl/realm_coordinator.hpp @@ -200,7 +200,7 @@ class RealmCoordinator : public std::enable_shared_from_this, void close(); bool compact(); - void write_copy(StringData path, const char* key); + void write_copy(std::string_view path, const char* key); // Close the DB, delete the file, and then reopen it. This operation is *not* // implemented in a safe manner and will only work in fairly specific circumstances diff --git a/src/realm/query_engine.hpp b/src/realm/query_engine.hpp index 8b7ecf2d1e8..6a91c717cf5 100644 --- a/src/realm/query_engine.hpp +++ b/src/realm/query_engine.hpp @@ -91,6 +91,7 @@ TConditionValue: Type of values in condition column. That is, int64_t, float, #include #include #include +#include #include #include diff --git a/src/realm/sync/noinst/client_impl_base.hpp b/src/realm/sync/noinst/client_impl_base.hpp index 6dc55bfb386..0feb5069f17 100644 --- a/src/realm/sync/noinst/client_impl_base.hpp +++ b/src/realm/sync/noinst/client_impl_base.hpp @@ -27,6 +27,7 @@ #include #include #include +#include #include namespace realm::sync { diff --git a/src/realm/sync/noinst/server/server.cpp b/src/realm/sync/noinst/server/server.cpp index aeac91412f5..e317076d0d8 100644 --- a/src/realm/sync/noinst/server/server.cpp +++ b/src/realm/sync/noinst/server/server.cpp @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/src/realm/sync/tools/print_changeset.cpp b/src/realm/sync/tools/print_changeset.cpp index 3412bc0ec77..d69839ce57c 100644 --- a/src/realm/sync/tools/print_changeset.cpp +++ b/src/realm/sync/tools/print_changeset.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include diff --git a/src/realm/transaction.cpp b/src/realm/transaction.cpp index 6714a6c1b60..9e93875923f 100644 --- a/src/realm/transaction.cpp +++ b/src/realm/transaction.cpp @@ -171,7 +171,6 @@ Transaction::Transaction(DBRef _db, SlabAlloc* alloc, DB::ReadLockInfo& rli, DB: bool writable = stage == DB::transact_Writing; m_transact_stage = DB::transact_Ready; set_transact_stage(stage); - m_alloc.note_reader_start(this); attach_shared(m_read_lock.m_top_ref, m_read_lock.m_file_size, writable, VersionID{rli.m_version, rli.m_reader_idx}); if (db->m_logger) { @@ -827,7 +826,6 @@ void Transaction::do_end_read() noexcept } db->release_read_lock(m_read_lock); - m_alloc.note_reader_end(this); set_transact_stage(DB::transact_Ready); // reset the std::shared_ptr to allow the DB object to release resources // as early as possible. @@ -851,7 +849,6 @@ void Transaction::close_read_with_lock() m_oldest_version_not_persisted->m_file_size); db->do_release_read_lock(m_read_lock); - m_alloc.note_reader_end(this); set_transact_stage(DB::transact_Ready); // reset the std::shared_ptr to allow the DB object to release resources // as early as possible. diff --git a/src/realm/util/aes_cryptor.hpp b/src/realm/util/aes_cryptor.hpp index d9f8da87fc2..24f5a448b74 100644 --- a/src/realm/util/aes_cryptor.hpp +++ b/src/realm/util/aes_cryptor.hpp @@ -19,28 +19,31 @@ #ifndef REALM_AES_CRYPTOR_HPP #define REALM_AES_CRYPTOR_HPP +#include +#include + #include #include -#include -#include #include +#include #include -#include -#include - namespace realm::util { class WriteObserver { public: virtual bool no_concurrent_writer_seen() = 0; - virtual ~WriteObserver() {} + +protected: + ~WriteObserver() = default; }; class WriteMarker { public: virtual void mark(uint64_t page_offset) = 0; virtual void unmark() = 0; - virtual ~WriteMarker() {} + +protected: + ~WriteMarker() = default; }; } // namespace realm::util @@ -60,25 +63,26 @@ class WriteMarker { namespace realm::util { -struct iv_table; +struct IVTable; class EncryptedFileMapping; -enum class IVRefreshState { UpToDate, RequiresRefresh }; - class AESCryptor { public: - AESCryptor(const uint8_t* key); + AESCryptor(const char* key); ~AESCryptor() noexcept; - void set_file_size(off_t new_size); + void set_data_size(File::SizeType new_size); - size_t read(FileDesc fd, off_t pos, char* dst, size_t size, WriteObserver* observer = nullptr); - void try_read_block(FileDesc fd, off_t pos, char* dst) noexcept; - void write(FileDesc fd, off_t pos, const char* src, size_t size, WriteMarker* marker = nullptr) noexcept; - util::FlatMap refresh_ivs(FileDesc fd, off_t data_pos, size_t page_ndx_in_file_expected, - size_t end_page_ndx_in_file); + size_t read(FileDesc fd, File::SizeType pos, char* dst, size_t size, WriteObserver* observer = nullptr); + void try_read_block(FileDesc fd, File::SizeType pos, char* dst) noexcept; + void write(FileDesc fd, File::SizeType pos, const char* src, size_t size, WriteMarker* marker = nullptr) noexcept; + bool refresh_iv(FileDesc fd, size_t page_ndx); + void invalidate_ivs() noexcept; - void check_key(const uint8_t* key); + const char* get_key() const noexcept + { + return reinterpret_cast(m_key.data()); + } private: enum EncryptionMode { @@ -105,36 +109,17 @@ class AESCryptor { EVP_CIPHER_CTX* m_ctx; #endif - std::array m_aesKey; - std::array m_hmacKey; - std::vector m_iv_buffer; + const std::array m_key; + std::vector m_iv_buffer; + std::vector m_iv_buffer_cache; std::unique_ptr m_rw_buffer; std::unique_ptr m_dst_buffer; - std::vector m_iv_buffer_cache; bool check_hmac(const void* data, size_t len, const std::array& hmac) const; - void crypt(EncryptionMode mode, off_t pos, char* dst, const char* src, const char* stored_iv) noexcept; - iv_table& get_iv_table(FileDesc fd, off_t data_pos, IVLookupMode mode = IVLookupMode::UseCache) noexcept; + void crypt(EncryptionMode mode, File::SizeType pos, char* dst, const char* src, const char* stored_iv) noexcept; + IVTable& get_iv_table(FileDesc fd, File::SizeType data_pos, IVLookupMode mode = IVLookupMode::UseCache) noexcept; void handle_error(); -}; - -struct ReaderInfo { - const void* reader_ID; - uint64_t version; -}; - -struct SharedFileInfo { - FileDesc fd; - AESCryptor cryptor; - std::vector mappings; - uint64_t last_scanned_version = 0; - uint64_t current_version = 0; - size_t num_decrypted_pages = 0; - size_t num_reclaimed_pages = 0; - size_t progress_index = 0; - std::vector readers; - - SharedFileInfo(const uint8_t* key); + void read_iv_block(FileDesc fd, File::SizeType data_pos); }; } // namespace realm::util diff --git a/src/realm/util/encrypted_file_mapping.cpp b/src/realm/util/encrypted_file_mapping.cpp index 8106b96336e..520366b66e5 100644 --- a/src/realm/util/encrypted_file_mapping.cpp +++ b/src/realm/util/encrypted_file_mapping.cpp @@ -18,6 +18,7 @@ #include +#include #include #include @@ -25,26 +26,23 @@ #if REALM_ENABLE_ENCRYPTION #include #include -#include #include #include +#include -#include #include +#include #include -#include +#include +#include +#include #include -#include #include #ifdef REALM_DEBUG #include #endif -#include -#include -#include - #if defined(_WIN32) #include #include @@ -52,128 +50,223 @@ #else #include #include -#include #endif namespace realm::util { -SharedFileInfo::SharedFileInfo(const uint8_t* key) - : cryptor(key) -{ -} - -// We have the following constraints here: +// When Realm's file encryption was originally designed, we had the constraint +// that all encryption and decryption had to happen in aligned system page size +// sized blocks due to the use of signal handlers to lazily decrypt data and +// track where writes occurrs. This is no longer the case, but may still help +// explain why the file layout looks the way it does. +// +// Encryption is performed on 4096 byte data pages. Each group of 64 data pages +// is arranged into a "block", which has a 4096 byte header containing the IVs +// and HMACs for the following pages. Each page has *two* IVs and HMACs stored. +// iv2/hmac2 contain the values which were last used to successfully decrypt +// the page, while iv1/hmac1 is the values which were used to last encrypt the +// page. +// +// Writing new encrypted data has the following steps: +// +// 1. Copy iv1/hmac1 to iv2/hmac2 in the IVTable +// 2. Increment iv1 +// 3. Encrypt the page in memory +// 4. Compute the hmac for the new encrypted data. +// 5. If the hmac matches the previous hmac, goto 2 (this will not ever actually happen) +// 6. Write the new IVTable for the page. +// 7. fsync() (or F_BARRIERFSYNC on Apple) +// 8. Write the new encrypted data +// +// If we are interrupted before #6, no i/o has happened and the data on disk is +// fine. If we are interrupted between #6 and #8, then when we next try to read +// the page the hmac check using hmac1 will fail, but the check using hmac2 +// will succeed and we will be able to read the old data. We then copy +// iv2/hmac2 back to the active fields and continue as normal. +// +// This scheme breaks if we have a partial write of the 4k page. This is +// impossible with SSDs, which can only write in their atomic block size, and +// it would be extremely unusual for that to be smaller than 4k. It may be a +// problem when running on HDDs, though. // -// 1. When writing, we only know which 4k page is dirty, and not what bytes -// within the page are dirty, so we always have to write in 4k blocks. -// 2. Pages being written need to be entirely within an 8k-aligned block to -// ensure that they're written to the hardware in atomic blocks. -// 3. We need to store the IV used for each 4k page somewhere, so that we can -// ensure that we never reuse an IV (and still be decryptable). +// Reading from an encrypted file is done by creating a mapping and then +// calling `read_barrier(addr, size)` to mark the section of the mapping which +// needs to be populated. This decrypts each of the pages which cover that +// range and places the plaintext into memory. If any of the pages were already +// decrypted, this is a no-op that skips reading anything and just assumes that +// the data was up-to-date. // -// Because pages need to be aligned, we can't just prepend the IV to each page, -// or we'd have to double the size of the file (as the rest of the 4k block -// containing the IV would not be usable). Writing the IVs to a different part -// of the file from the data results in them not being in the same 8k block, and -// so it is possible that only the IV or only the data actually gets updated on -// disk. We deal with this by storing four pieces of data about each page: the -// hash of the encrypted data, the current IV, the hash of the previous encrypted -// data, and the previous IV. To write, we encrypt the data, hash the ciphertext, -// then write the new IV/ciphertext hash, fsync(), and then write the new -// ciphertext. This ensures that if an error occurs between writing the IV and -// the ciphertext, we can still determine that we should use the old IV, since -// the ciphertext's hash will match the old ciphertext. - -// This produces a file on disk with the following layout: -// 4k block of metadata (up to 64 iv_table instances stored here) -// 64 * 4k blocks of data (up to 262144 bytes of data are stored here) -// 4k block of metadata -// 64 * 4k blocks of data -// ... - -struct iv_table { +// Writing is done with `read_barrier(addr, size, true)` before performing any +// writes to mark the range as writeable, and then `write_barrier(addr, size)` +// to mark bytes which were actually written to. `write_barrier()` eagerly +// copies all of the written bytes to any other active mappings on the same +// file which have those pages decrypted in memory. This is spooky +// threading-wise, and is only made safe by Realm's MVCC semantics - if we're +// writing to a section of the file we know that no one can be legally reading +// those exact bytes, and we must be writing to different bytes in the same +// page. This copying makes it so that we never have to recheck the disk; once +// we have read and decrypted a page for a mapping, that page is forevermore +// valid and up-to-date. +// +// All dirty data is kept buffered in memory until `flush()` is called. +// +// In multi-process scenarios (or just multiple File instances for a single +// file in a single process, which doesn't happen when using the public API +// normally), eagerly keeping decrypted pages up to date is impossible, and we +// sometimes need to recheck the disk. Here we once again take advantage of +// Realm being MVCC with discrete points where we may need to see newer +// versions of the data on disk. When the reader view is updated, if there have +// been any external writes to the file SlabAlloc calls +// `mark_pages_for_iv_check()`, which puts all up-to-date pages into a +// potentially-stale state. The next time each page is accessed, we reread the +// IVTable for that page. If it's the same as the IVTable for the plaintext we +// have in memory then the page is marked as being up-to-date, and if it's +// different we reread the page. +// +// Another source of complexity in multiprocess scenarios is that while we +// assume that the actual i/o is atomic in 4k chunks, writing to the in-memory +// buffers is distinctly not atomic. One process reading from a memory mapping +// while another process is writing to that position in the file can see +// incomplete writes. Rather than doing page-level locking, we assume that this +// will be very rare and perform optimistic unlocked reads. If decryption fails +// and we are in a potentially-multiprocess scenario we retry the read several +// times before reporting an error. + +struct IVTable { uint32_t iv1 = 0; std::array hmac1 = {}; uint32_t iv2 = 0; std::array hmac2 = {}; - bool operator==(const iv_table& other) const + bool operator==(const IVTable& other) const { return iv1 == other.iv1 && iv2 == other.iv2 && hmac1 == other.hmac1 && hmac2 == other.hmac2; } - bool operator!=(const iv_table& other) const + bool operator!=(const IVTable& other) const { return !(*this == other); } }; +// We read this via memcpy and need it to be packed +static_assert(sizeof(IVTable) == 64); namespace { -const int aes_block_size = 16; -const size_t block_size = 4096; - -const size_t metadata_size = sizeof(iv_table); -const size_t blocks_per_metadata_block = block_size / metadata_size; +constexpr uint8_t aes_block_size = 16; +constexpr uint16_t encryption_page_size = 4096; +constexpr uint8_t metadata_size = sizeof(IVTable); +constexpr uint8_t pages_per_block = encryption_page_size / metadata_size; static_assert(metadata_size == 64, "changing the size of the metadata breaks compatibility with existing Realm files"); +using SizeType = File::SizeType; + +template +To checked_cast(From from) +{ + To to; + if (REALM_UNLIKELY(int_cast_with_overflow_detect(from, to))) { + throw MaximumFileSizeExceeded(util::format("File size %1 is larger than can be represented", from)); + } + return to; +} + +// Overflows when converting from file positions (always 64-bits) to size_t +// (sometimes 32-bits) should all be caught by set_file_size() +template +constexpr To assert_cast(From from) +{ + REALM_ASSERT_DEBUG(!int_cast_has_overflow(from)); + return static_cast(from); +} + +// Index of page which contains `data_pos` +constexpr size_t page_index(SizeType data_pos) noexcept +{ + SizeType index = data_pos / encryption_page_size; + return assert_cast(index); +} + +// Number of pages required to store `size` bytes +constexpr size_t page_count(SizeType size) noexcept +{ + return assert_cast((size + encryption_page_size - 1) / encryption_page_size); +} + +// Index of the metadata block which contains `data_pos` +constexpr size_t block_index(SizeType data_pos) noexcept +{ + return page_index(data_pos) / pages_per_block; +} + +// Number of metadata blocks required to store `size` bytes +constexpr size_t block_count(SizeType data_size) noexcept +{ + return (page_count(data_size) + pages_per_block - 1) / pages_per_block; +} + // map an offset in the data to the actual location in the file -template -Int real_offset(Int pos) +SizeType data_pos_to_file_pos(SizeType data_pos) { - REALM_ASSERT(pos >= 0); - const size_t index = static_cast(pos) / block_size; - const size_t metadata_page_count = index / blocks_per_metadata_block + 1; - return Int(pos + metadata_page_count * block_size); + REALM_ASSERT(data_pos >= 0); + return data_pos + (block_index(data_pos) + 1) * encryption_page_size; } // map a location in the file to the offset in the data -template -Int fake_offset(Int pos) +SizeType file_pos_to_data_pos(SizeType file_pos) { - REALM_ASSERT(pos >= 0); - const size_t index = static_cast(pos) / block_size; - const size_t metadata_page_count = (index + blocks_per_metadata_block) / (blocks_per_metadata_block + 1); - return pos - metadata_page_count * block_size; + REALM_ASSERT(file_pos >= 0); + const size_t metadata_page_count = (page_index(file_pos) + pages_per_block) / (pages_per_block + 1); + return file_pos - metadata_page_count * encryption_page_size; } -// get the location of the iv_table for the given data (not file) position -off_t iv_table_pos(off_t pos) +// get the location of the IVTable for the given data (not file) position +SizeType iv_table_pos(SizeType data_pos) { - REALM_ASSERT(pos >= 0); - const size_t index = static_cast(pos) / block_size; - const size_t metadata_block = index / blocks_per_metadata_block; - const size_t metadata_index = index & (blocks_per_metadata_block - 1); - return off_t(metadata_block * (blocks_per_metadata_block + 1) * block_size + metadata_index * metadata_size); + REALM_ASSERT(data_pos >= 0); + const size_t index = page_index(data_pos); + const size_t metadata_block = block_index(data_pos); + const size_t metadata_index = index & (pages_per_block - 1); + return metadata_block * (pages_per_block + 1) * encryption_page_size + metadata_index * metadata_size; } -void check_write(FileDesc fd, off_t pos, const void* data, size_t len) +// get the file location of the IVTable block for the given data (not file) position +SizeType iv_table_block_pos(SizeType data_pos) { - uint64_t orig = File::get_file_pos(fd); - File::seek_static(fd, pos); - File::write_static(fd, static_cast(data), len); - File::seek_static(fd, orig); + REALM_ASSERT(data_pos >= 0); + return block_index(data_pos) * (pages_per_block + 1) * encryption_page_size; } -size_t check_read(FileDesc fd, off_t pos, void* dst, size_t len) +constexpr size_t iv_table_size(SizeType data_pos) { - uint64_t orig = File::get_file_pos(fd); - File::seek_static(fd, pos); - size_t ret = File::read_static(fd, static_cast(dst), len); - File::seek_static(fd, orig); - return ret; + return block_count(data_pos) * pages_per_block; } -} // anonymous namespace +// not actually checked any more +size_t check_read(FileDesc fd, SizeType pos, void* dst) +{ + return File::read_static(fd, pos, static_cast(dst), encryption_page_size); +} // first block is iv data, second page is data -static_assert(c_min_encrypted_file_size == 2 * block_size, +static_assert(c_min_encrypted_file_size == 2 * encryption_page_size, "chaging the block size breaks encrypted file portability"); -AESCryptor::AESCryptor(const uint8_t* key) - : m_rw_buffer(new char[block_size]) - , m_dst_buffer(new char[block_size]) +template +constexpr std::array to_array_impl(const T* ptr, std::index_sequence) { - memcpy(m_aesKey.data(), key, 32); - memcpy(m_hmacKey.data(), key + 32, 32); + return {{ptr[I]...}}; +} +template +constexpr auto to_array(const T* ptr) +{ + return to_array_impl(ptr, std::make_index_sequence{}); +} +} // anonymous namespace + +AESCryptor::AESCryptor(const char* key) + : m_key(to_array(reinterpret_cast(key))) + , m_rw_buffer(new char[encryption_page_size]) + , m_dst_buffer(new char[encryption_page_size]) +{ #if REALM_PLATFORM_APPLE // A random iv is passed to CCCryptorReset. This iv is *not used* by Realm; we set it manually prior to // each call to BCryptEncrypt() and BCryptDecrypt(). We pass this random iv as an attempt to @@ -214,56 +307,50 @@ AESCryptor::~AESCryptor() noexcept #endif } -void AESCryptor::check_key(const uint8_t* key) -{ - if (memcmp(m_aesKey.data(), key, 32) != 0 || memcmp(m_hmacKey.data(), key + 32, 32) != 0) - throw DecryptionFailed(); -} - void AESCryptor::handle_error() { throw std::runtime_error("Error occurred in encryption layer"); } -void AESCryptor::set_file_size(off_t new_size) +void AESCryptor::set_data_size(SizeType new_data_size) { - REALM_ASSERT(new_size >= 0 && !int_cast_has_overflow(new_size)); - size_t new_size_casted = size_t(new_size); - size_t block_count = (new_size_casted + block_size - 1) / block_size; - m_iv_buffer.reserve((block_count + blocks_per_metadata_block - 1) & ~(blocks_per_metadata_block - 1)); + REALM_ASSERT(new_data_size >= 0); + m_iv_buffer.reserve(iv_table_size(new_data_size)); m_iv_buffer_cache.reserve(m_iv_buffer.capacity()); } -iv_table& AESCryptor::get_iv_table(FileDesc fd, off_t data_pos, IVLookupMode mode) noexcept +IVTable& AESCryptor::get_iv_table(FileDesc fd, SizeType data_pos, IVLookupMode mode) noexcept { - REALM_ASSERT(!int_cast_has_overflow(data_pos)); - size_t data_pos_casted = size_t(data_pos); - size_t idx = data_pos_casted / block_size; - if (mode == IVLookupMode::UseCache && idx < m_iv_buffer.size()) - return m_iv_buffer[idx]; - - size_t block_start = std::min(m_iv_buffer.size(), (idx / blocks_per_metadata_block) * blocks_per_metadata_block); - size_t block_end = 1 + idx / blocks_per_metadata_block; - REALM_ASSERT(block_end * blocks_per_metadata_block <= m_iv_buffer.capacity()); // not safe to allocate here - if (block_end * blocks_per_metadata_block > m_iv_buffer.size()) { - m_iv_buffer.resize(block_end * blocks_per_metadata_block); - m_iv_buffer_cache.resize(m_iv_buffer.size()); + size_t idx = page_index(data_pos); + REALM_ASSERT(idx < m_iv_buffer.capacity()); // required space should have been preallocated + // FIXME: reread on zero is bad; need to track this separately + if (mode != IVLookupMode::UseCache || idx >= m_iv_buffer.size() || m_iv_buffer[idx].iv1 == 0) { + read_iv_block(fd, data_pos); } + m_iv_buffer_cache[idx] = m_iv_buffer[idx]; + return m_iv_buffer[idx]; +} - for (size_t i = block_start; i < block_end * blocks_per_metadata_block; i += blocks_per_metadata_block) { - off_t iv_pos = iv_table_pos(off_t(i * block_size)); - size_t bytes = check_read(fd, iv_pos, &m_iv_buffer[i], block_size); - if (bytes < block_size) - break; // rest is zero-filled by resize() +// We always read an entire block of IVTables at a time rather than just the +// one we need as it's likely to take about the same amount of time up front +// and greatly reduce the total number of read calls we have to make +void AESCryptor::read_iv_block(FileDesc fd, SizeType data_pos) +{ + size_t idx = block_index(data_pos) * pages_per_block; + if (idx + pages_per_block > m_iv_buffer.size()) { + m_iv_buffer.resize(idx + pages_per_block, IVTable{}); + if (m_iv_buffer_cache.size() < m_iv_buffer.size()) { + m_iv_buffer_cache.resize(m_iv_buffer.size(), IVTable{}); + } } - - return m_iv_buffer[idx]; + SizeType iv_pos = iv_table_block_pos(data_pos); + check_read(fd, iv_pos, &m_iv_buffer[idx]); } bool AESCryptor::check_hmac(const void* src, size_t len, const std::array& hmac) const { std::array buffer; - hmac_sha224(Span(reinterpret_cast(src), len), buffer, m_hmacKey); + hmac_sha224(Span(reinterpret_cast(src), len), buffer, Span(m_key).sub_span<32>()); // Constant-time memcmp to avoid timing attacks uint8_t result = 0; @@ -272,81 +359,47 @@ bool AESCryptor::check_hmac(const void* src, size_t len, const std::array -AESCryptor::refresh_ivs(FileDesc fd, off_t data_pos, size_t page_ndx_in_file_expected, size_t end_page_ndx_in_file) +bool AESCryptor::refresh_iv(FileDesc fd, size_t page_ndx) { - REALM_ASSERT_EX(page_ndx_in_file_expected < end_page_ndx_in_file, page_ndx_in_file_expected, - end_page_ndx_in_file); - // the indices returned are page indices, not block indices - util::FlatMap page_states; - - REALM_ASSERT(!int_cast_has_overflow(data_pos)); - size_t data_pos_casted = size_t(data_pos); - // the call to get_iv_table() below reads in all ivs in a chunk with size = blocks_per_metadata_block - // so we will know if any iv in this chunk has changed - const size_t block_ndx_refresh_start = - ((data_pos_casted / block_size) / blocks_per_metadata_block) * blocks_per_metadata_block; - const size_t block_ndx_refresh_end = block_ndx_refresh_start + blocks_per_metadata_block; - REALM_ASSERT_EX(block_ndx_refresh_end <= m_iv_buffer.size(), block_ndx_refresh_start, block_ndx_refresh_end, - m_iv_buffer.size()); - - get_iv_table(fd, data_pos, IVLookupMode::Refetch); - - size_t number_of_identical_blocks = 0; - size_t last_page_index = -1; - constexpr iv_table uninitialized_iv = {}; - // there may be multiple iv blocks per page so all must be unchanged for a page - // to be considered unchanged. If any one of the ivs has changed then the entire page - // must be refreshed. Eg. with a page_size() of 16k and block_size of 4k, if any of - // the 4 ivs in that page are different, the entire page must be refreshed. - const size_t num_required_identical_blocks_for_page_match = page_size() / block_size; - for (size_t block_ndx = block_ndx_refresh_start; block_ndx < block_ndx_refresh_end; ++block_ndx) { - size_t page_index = block_ndx * block_size / page_size(); - if (page_index >= end_page_ndx_in_file) { - break; - } - if (page_index != last_page_index) { - number_of_identical_blocks = 0; - } - if (m_iv_buffer_cache[block_ndx] != m_iv_buffer[block_ndx] || m_iv_buffer[block_ndx] == uninitialized_iv) { - page_states[page_index] = IVRefreshState::RequiresRefresh; - m_iv_buffer_cache[block_ndx] = m_iv_buffer[block_ndx]; - } - else { - ++number_of_identical_blocks; - } - if (number_of_identical_blocks >= num_required_identical_blocks_for_page_match) { - REALM_ASSERT_EX(page_states.count(page_index) == 0, page_index, page_ndx_in_file_expected); - page_states[page_index] = IVRefreshState::UpToDate; - } - last_page_index = page_index; + REALM_ASSERT(page_ndx < m_iv_buffer.capacity()); + // FIXME: reread on zero is bad; need to track this separately + if (page_ndx >= m_iv_buffer.size() || m_iv_buffer[page_ndx].iv1 == 0) { + read_iv_block(fd, SizeType(page_ndx) * encryption_page_size); + } + + if (m_iv_buffer[page_ndx] != m_iv_buffer_cache[page_ndx]) { + m_iv_buffer_cache[page_ndx] = m_iv_buffer[page_ndx]; + return true; } - REALM_ASSERT_EX(page_states.count(page_ndx_in_file_expected) == 1, page_states.size(), page_ndx_in_file_expected, - block_ndx_refresh_start, blocks_per_metadata_block); - return page_states; + return false; +} + +void AESCryptor::invalidate_ivs() noexcept +{ + m_iv_buffer.clear(); } -size_t AESCryptor::read(FileDesc fd, off_t pos, char* dst, size_t size, WriteObserver* observer) +size_t AESCryptor::read(FileDesc fd, SizeType pos, char* dst, size_t size, WriteObserver* observer) { - REALM_ASSERT_EX(size % block_size == 0, size, block_size); - // We need to throw DecryptionFailed if the key is incorrect or there has been a corruption in the data but - // not in a reader starvation scenario where a different process is writing pages and ivs faster than we can read - // them. We also want to optimize for a single process writer since in that case all the cached ivs are correct. - // To do this, we first attempt to use the cached IV, and if it is invalid, read from disk again. During reader - // starvation, the just read IV could already be out of date with the data page, so continue trying to read until - // a match is found (for up to 5 seconds before giving up entirely). + REALM_ASSERT_EX(size % encryption_page_size == 0, size, encryption_page_size); + // We need to throw DecryptionFailed if the key is incorrect or there has + // been a corruption in the data but not in a reader starvation scenario + // where a different process is writing pages and ivs faster than we can + // read them. We also want to optimize for a single process writer since in + // that case all the cached ivs are correct. To do this, we first attempt + // to use the cached IV, and if it is invalid, read from disk again. During + // reader starvation, the just read IV could already be out of date with + // the data page, so continue trying to read until a match is found (for up + // to 5 seconds before giving up entirely). size_t retry_count = 0; - std::pair last_iv_and_data_hash; + std::pair last_iv_and_data_hash; auto retry_start_time = std::chrono::steady_clock::now(); size_t num_identical_reads = 1; - auto retry = [&](std::string_view page_data, const iv_table& iv, const char* debug_from) { + auto retry = [&](std::string_view page_data, const IVTable& iv, const char* debug_from) { constexpr auto max_retry_period = std::chrono::seconds(5); auto elapsed = std::chrono::steady_clock::now() - retry_start_time; - bool we_are_alone = true; // not having an observer set means that we're alone. (or should mean it) - if (observer) { - we_are_alone = observer->no_concurrent_writer_seen(); - } + bool we_are_alone = !observer || observer->no_concurrent_writer_seen(); if (we_are_alone || (retry_count > 0 && elapsed > max_retry_period)) { auto str = util::format("unable to decrypt after %1 seconds (retry_count=%2, from=%3, size=%4)", std::chrono::duration_cast(elapsed).count(), retry_count, @@ -354,29 +407,26 @@ size_t AESCryptor::read(FileDesc fd, off_t pos, char* dst, size_t size, WriteObs // std::cerr << std::endl << "*Timeout: " << str << std::endl; throw DecryptionFailed(str); } - else { - // don't wait on the first retry as we want to optimize the case where the first read - // from the iv table cache didn't validate and we are fetching the iv block from disk for the first time - auto cur_iv_and_data_hash = std::make_pair(iv, std::hash{}(page_data)); - if (retry_count != 0) { - if (last_iv_and_data_hash == cur_iv_and_data_hash) { - ++num_identical_reads; - } - // don't retry right away if there are potentially other external writers - std::this_thread::yield(); + + // don't wait on the first retry as we want to optimize the case where the first read + // from the iv table cache didn't validate and we are fetching the iv block from disk for the first time + auto cur_iv_and_data_hash = std::make_pair(iv, std::hash{}(page_data)); + if (retry_count != 0) { + if (last_iv_and_data_hash == cur_iv_and_data_hash) { + ++num_identical_reads; } - last_iv_and_data_hash = cur_iv_and_data_hash; - ++retry_count; + // don't retry right away if there are potentially other external writers + std::this_thread::yield(); } + last_iv_and_data_hash = cur_iv_and_data_hash; + ++retry_count; }; auto should_retry = [&]() -> bool { - // if we don't have an observer object, we're guaranteed to be alone in the world, - // and retrying will not help us, since the file is not being changed. - if (!observer) - return false; - // if no-one is mutating the file, retrying will also not help: - if (observer && observer->no_concurrent_writer_seen()) + // if we don't have an observer object or it hasn't seen any other writers, + // we're guaranteed to be alone in the world and retrying will not help us, + // since the file is not being changed. + if (!observer || observer->no_concurrent_writer_seen()) return false; // if we do not observe identical data or iv within several sequential reads then // this is a multiprocess reader starvation scenario so keep trying until we get a match @@ -385,20 +435,17 @@ size_t AESCryptor::read(FileDesc fd, off_t pos, char* dst, size_t size, WriteObs size_t bytes_read = 0; while (bytes_read < size) { - ssize_t actual = check_read(fd, real_offset(pos), m_rw_buffer.get(), block_size); + size_t actual = check_read(fd, data_pos_to_file_pos(pos), m_rw_buffer.get()); if (actual == 0) return bytes_read; - iv_table& iv = get_iv_table(fd, pos, retry_count == 0 ? IVLookupMode::UseCache : IVLookupMode::Refetch); + IVTable& iv = get_iv_table(fd, pos, retry_count == 0 ? IVLookupMode::UseCache : IVLookupMode::Refetch); if (iv.iv1 == 0) { - if (should_retry()) { - retry(std::string_view{m_rw_buffer.get(), block_size}, iv, "iv1 == 0"); - continue; - } // This block has never been written to, so we've just read pre-allocated // space. No memset() since the code using this doesn't rely on - // pre-allocated space being zeroed. + // pre-allocated space being zeroed and it could mask uninitialized + // reads. return bytes_read; } @@ -407,7 +454,7 @@ size_t AESCryptor::read(FileDesc fd, off_t pos, char* dst, size_t size, WriteObs // new IV and writing the data if (iv.iv2 == 0) { if (should_retry()) { - retry(std::string_view{m_rw_buffer.get(), block_size}, iv, "iv2 == 0"); + retry(std::string_view{m_rw_buffer.get(), encryption_page_size}, iv, "iv2 == 0"); continue; } // Very first write was interrupted @@ -424,22 +471,18 @@ size_t AESCryptor::read(FileDesc fd, off_t pos, char* dst, size_t size, WriteObs // old hmacs that don't go with this data. ftruncate() is // required to fill any added space with zeroes, so assume that's // what happened if the buffer is all zeroes - ssize_t i; - for (i = 0; i < actual; ++i) { - if (m_rw_buffer[i] != 0) { - break; - } - } - if (i != actual) { - // at least one byte wasn't zero - retry(std::string_view{m_rw_buffer.get(), block_size}, iv, "i != bytes_read"); - continue; - } - return bytes_read; + bool all_zero = std::all_of(&m_rw_buffer[0], &m_rw_buffer[actual], [](char c) { + return c == 0; + }); + if (all_zero) + return bytes_read; + + retry(std::string_view{m_rw_buffer.get(), encryption_page_size}, iv, "i != bytes_read"); + continue; } } - // We may expect some adress ranges of the destination buffer of + // We may expect some address ranges of the destination buffer of // AESCryptor::read() to stay unmodified, i.e. being overwritten with // the same bytes as already present, and may have read-access to these // from other threads while decryption is taking place. @@ -453,30 +496,30 @@ size_t AESCryptor::read(FileDesc fd, off_t pos, char* dst, size_t size, WriteObs // We therefore decrypt to a temporary buffer first and then copy the // completely decrypted data after. crypt(mode_Decrypt, pos, m_dst_buffer.get(), m_rw_buffer.get(), reinterpret_cast(&iv.iv1)); - memcpy(dst, m_dst_buffer.get(), block_size); + memcpy(dst, m_dst_buffer.get(), encryption_page_size); - pos += block_size; - dst += block_size; - bytes_read += block_size; + pos += encryption_page_size; + dst += encryption_page_size; + bytes_read += encryption_page_size; retry_count = 0; } return bytes_read; } -void AESCryptor::try_read_block(FileDesc fd, off_t pos, char* dst) noexcept +void AESCryptor::try_read_block(FileDesc fd, SizeType pos, char* dst) noexcept { - ssize_t bytes_read = check_read(fd, real_offset(pos), m_rw_buffer.get(), block_size); + size_t bytes_read = check_read(fd, data_pos_to_file_pos(pos), m_rw_buffer.get()); if (bytes_read == 0) { std::cerr << "Read failed: 0x" << std::hex << pos << std::endl; - memset(dst, 0x55, block_size); + memset(dst, 0x55, encryption_page_size); return; } - iv_table& iv = get_iv_table(fd, pos, IVLookupMode::Refetch); + IVTable& iv = get_iv_table(fd, pos, IVLookupMode::Refetch); if (iv.iv1 == 0) { std::cerr << "Block never written: 0x" << std::hex << pos << std::endl; - memset(dst, 0xAA, block_size); + memset(dst, 0xAA, encryption_page_size); return; } @@ -496,11 +539,11 @@ void AESCryptor::try_read_block(FileDesc fd, off_t pos, char* dst) noexcept crypt(mode_Decrypt, pos, dst, m_rw_buffer.get(), reinterpret_cast(&iv.iv1)); } -void AESCryptor::write(FileDesc fd, off_t pos, const char* src, size_t size, WriteMarker* marker) noexcept +void AESCryptor::write(FileDesc fd, SizeType pos, const char* src, size_t size, WriteMarker* marker) noexcept { - REALM_ASSERT(size % block_size == 0); + REALM_ASSERT(size % encryption_page_size == 0); while (size > 0) { - iv_table& iv = get_iv_table(fd, pos); + IVTable& iv = get_iv_table(fd, pos); memcpy(&iv.iv2, &iv.iv1, 32); // this is also copying the hmac do { @@ -510,7 +553,8 @@ void AESCryptor::write(FileDesc fd, off_t pos, const char* src, size_t size, Wri ++iv.iv1; crypt(mode_Encrypt, pos, m_rw_buffer.get(), src, reinterpret_cast(&iv.iv1)); - hmac_sha224(Span(reinterpret_cast(m_rw_buffer.get()), block_size), iv.hmac1, m_hmacKey); + hmac_sha224(Span(reinterpret_cast(m_rw_buffer.get()), encryption_page_size), iv.hmac1, + Span(m_key).sub_span<32>()); // In the extremely unlikely case that both the old and new versions have // the same hash we won't know which IV to use, so bump the IV until // they're different. @@ -518,18 +562,19 @@ void AESCryptor::write(FileDesc fd, off_t pos, const char* src, size_t size, Wri if (marker) marker->mark(pos); - check_write(fd, iv_table_pos(pos), &iv, sizeof(iv)); - check_write(fd, real_offset(pos), m_rw_buffer.get(), block_size); + File::write_static(fd, iv_table_pos(pos), reinterpret_cast(&iv), sizeof(iv)); + File::write_static(fd, data_pos_to_file_pos(pos), m_rw_buffer.get(), encryption_page_size); if (marker) marker->unmark(); + m_iv_buffer_cache[page_index(pos)] = iv; - pos += block_size; - src += block_size; - size -= block_size; + pos += encryption_page_size; + src += encryption_page_size; + size -= encryption_page_size; } } -void AESCryptor::crypt(EncryptionMode mode, off_t pos, char* dst, const char* src, const char* stored_iv) noexcept +void AESCryptor::crypt(EncryptionMode mode, SizeType pos, char* dst, const char* src, const char* stored_iv) noexcept { uint8_t iv[aes_block_size] = {0}; memcpy(iv, stored_iv, 4); @@ -540,31 +585,32 @@ void AESCryptor::crypt(EncryptionMode mode, off_t pos, char* dst, const char* sr CCCryptorReset(cryptor, iv); size_t bytesEncrypted = 0; - CCCryptorStatus err = CCCryptorUpdate(cryptor, src, block_size, dst, block_size, &bytesEncrypted); + CCCryptorStatus err = + CCCryptorUpdate(cryptor, src, encryption_page_size, dst, encryption_page_size, &bytesEncrypted); REALM_ASSERT(err == kCCSuccess); - REALM_ASSERT(bytesEncrypted == block_size); + REALM_ASSERT(bytesEncrypted == encryption_page_size); #elif defined(_WIN32) ULONG cbData; int i; if (mode == mode_Encrypt) { - i = BCryptEncrypt(m_aes_key_handle, (PUCHAR)src, block_size, nullptr, (PUCHAR)iv, sizeof(iv), (PUCHAR)dst, - block_size, &cbData, 0); + i = BCryptEncrypt(m_aes_key_handle, (PUCHAR)src, encryption_page_size, nullptr, (PUCHAR)iv, sizeof(iv), + (PUCHAR)dst, encryption_page_size, &cbData, 0); REALM_ASSERT_RELEASE_EX(i == 0 && "BCryptEncrypt()", i); - REALM_ASSERT_RELEASE_EX(cbData == block_size && "BCryptEncrypt()", cbData); + REALM_ASSERT_RELEASE_EX(cbData == encryption_page_size && "BCryptEncrypt()", cbData); } else if (mode == mode_Decrypt) { - i = BCryptDecrypt(m_aes_key_handle, (PUCHAR)src, block_size, nullptr, (PUCHAR)iv, sizeof(iv), (PUCHAR)dst, - block_size, &cbData, 0); + i = BCryptDecrypt(m_aes_key_handle, (PUCHAR)src, encryption_page_size, nullptr, (PUCHAR)iv, sizeof(iv), + (PUCHAR)dst, encryption_page_size, &cbData, 0); REALM_ASSERT_RELEASE_EX(i == 0 && "BCryptDecrypt()", i); - REALM_ASSERT_RELEASE_EX(cbData == block_size && "BCryptDecrypt()", cbData); + REALM_ASSERT_RELEASE_EX(cbData == encryption_page_size && "BCryptDecrypt()", cbData); } else { REALM_UNREACHABLE(); } #else - if (!EVP_CipherInit_ex(m_ctx, EVP_aes_256_cbc(), NULL, m_aesKey.data(), iv, mode)) + if (!EVP_CipherInit_ex(m_ctx, EVP_aes_256_cbc(), NULL, m_key.data(), iv, mode)) handle_error(); int len; @@ -572,7 +618,7 @@ void AESCryptor::crypt(EncryptionMode mode, off_t pos, char* dst, const char* sr EVP_CIPHER_CTX_set_padding(m_ctx, 0); if (!EVP_CipherUpdate(m_ctx, reinterpret_cast(dst), &len, reinterpret_cast(src), - block_size)) + encryption_page_size)) handle_error(); // Finalize the encryption. Should not output further data. @@ -581,374 +627,276 @@ void AESCryptor::crypt(EncryptionMode mode, off_t pos, char* dst, const char* sr #endif } -EncryptedFileMapping::EncryptedFileMapping(SharedFileInfo& file, size_t file_offset, void* addr, size_t size, +std::unique_ptr EncryptedFile::add_mapping(SizeType file_offset, void* addr, size_t size, + File::AccessMode access) +{ + auto mapping = std::make_unique(*this, file_offset, addr, size, access); + CheckedLockGuard lock(mutex); + mappings.push_back(mapping.get()); + return mapping; +} + +EncryptedFileMapping::EncryptedFileMapping(EncryptedFile& file, SizeType file_offset, void* addr, size_t size, File::AccessMode access, util::WriteObserver* observer, util::WriteMarker* marker) : m_file(file) - , m_page_shift(log2(realm::util::page_size())) - , m_blocks_per_page(static_cast(1ULL << m_page_shift) / block_size) - , m_num_decrypted(0) , m_access(access) , m_observer(observer) , m_marker(marker) #ifdef REALM_DEBUG - , m_validate_buffer(new char[static_cast(1ULL << m_page_shift)]) + , m_validate_buffer(new char[encryption_page_size]) #endif { - REALM_ASSERT(m_blocks_per_page * block_size == static_cast(1ULL << m_page_shift)); set(addr, size, file_offset); // throws - file.mappings.push_back(this); } EncryptedFileMapping::~EncryptedFileMapping() { + CheckedLockGuard lock(m_file.mutex); for (auto& e : m_page_state) { REALM_ASSERT(is_not(e, Writable)); } if (m_access == File::access_ReadWrite) { - flush(); - sync(); + do_sync(); + } + + // FIXME: might be worth intrusive listing this? + auto it = std::find(m_file.mappings.begin(), m_file.mappings.end(), this); + REALM_ASSERT(it != m_file.mappings.end()); + if (it != m_file.mappings.end()) { + m_file.mappings.erase(it); } - m_file.mappings.erase(remove(m_file.mappings.begin(), m_file.mappings.end(), this)); } -char* EncryptedFileMapping::page_addr(size_t local_page_ndx) const noexcept +// offset within page, not within file +uint16_t EncryptedFileMapping::get_offset_of_address(const void* addr) const noexcept { - REALM_ASSERT_EX(local_page_ndx < m_page_state.size(), local_page_ndx, m_page_state.size()); - return static_cast(m_addr) + (local_page_ndx << m_page_shift); + return reinterpret_cast(addr) & (encryption_page_size - 1); } -void EncryptedFileMapping::mark_outdated(size_t local_page_ndx) noexcept +size_t EncryptedFileMapping::get_local_index_of_address(const void* addr, size_t offset) const noexcept { - if (local_page_ndx >= m_page_state.size()) - return; - REALM_ASSERT(is_not(m_page_state[local_page_ndx], UpToDate)); - REALM_ASSERT(is_not(m_page_state[local_page_ndx], Dirty)); - REALM_ASSERT(is_not(m_page_state[local_page_ndx], Writable)); + REALM_ASSERT_EX(addr >= m_addr, addr, m_addr); + return (reinterpret_cast(addr) - reinterpret_cast(m_addr) + offset) / encryption_page_size; +} + +bool EncryptedFileMapping::contains_page(size_t block_in_file) const noexcept +{ + return block_in_file - m_first_page < m_page_state.size(); +} - size_t chunk_ndx = local_page_ndx >> page_to_chunk_shift; - if (m_chunk_dont_scan[chunk_ndx]) - m_chunk_dont_scan[chunk_ndx] = 0; +char* EncryptedFileMapping::page_addr(size_t local_ndx) const noexcept +{ + REALM_ASSERT_DEBUG(local_ndx < m_page_state.size()); + return static_cast(m_addr) + (local_ndx * encryption_page_size); +} + +SizeType EncryptedFileMapping::page_pos(size_t local_ndx) const noexcept +{ + return SizeType(local_ndx + m_first_page) * encryption_page_size; } -bool EncryptedFileMapping::copy_up_to_date_page(size_t local_page_ndx) noexcept +// If we have multiple mappings for the same part of the file, one of them may +// already contain the page we're about to read and if so we can skip reading +// it and instead just memcpy it. +bool EncryptedFileMapping::copy_up_to_date_page(size_t local_ndx) noexcept { - REALM_ASSERT_EX(local_page_ndx < m_page_state.size(), local_page_ndx, m_page_state.size()); + REALM_ASSERT_EX(local_ndx < m_page_state.size(), local_ndx, m_page_state.size()); // Precondition: this method must never be called for a page which // is already up to date. - REALM_ASSERT(is_not(m_page_state[local_page_ndx], UpToDate)); - for (size_t i = 0; i < m_file.mappings.size(); ++i) { - EncryptedFileMapping* m = m_file.mappings[i]; - size_t page_ndx_in_file = local_page_ndx + m_first_page; - if (m == this || !m->contains_page(page_ndx_in_file)) + REALM_ASSERT(is_not(m_page_state[local_ndx], UpToDate)); + size_t ndx_in_file = local_ndx + m_first_page; + for (auto& m : m_file.mappings) { + m->assert_locked(); + if (m == this || !m->contains_page(ndx_in_file)) continue; - size_t shadow_mapping_local_ndx = page_ndx_in_file - m->m_first_page; - if (is(m->m_page_state[shadow_mapping_local_ndx], UpToDate)) { - memcpy(page_addr(local_page_ndx), m->page_addr(shadow_mapping_local_ndx), - static_cast(1ULL << m_page_shift)); - return true; - } + size_t other_mapping_ndx = ndx_in_file - m->m_first_page; + if (is_not(m->m_page_state[other_mapping_ndx], UpToDate)) + continue; + + memcpy(page_addr(local_ndx), m->page_addr(other_mapping_ndx), encryption_page_size); + set(m_page_state[local_ndx], UpToDate); + clear(m_page_state[local_ndx], StaleIV); + return true; } return false; } -void EncryptedFileMapping::refresh_page(size_t local_page_ndx, size_t required) -{ - REALM_ASSERT_EX(local_page_ndx < m_page_state.size(), local_page_ndx, m_page_state.size()); - REALM_ASSERT(is_not(m_page_state[local_page_ndx], Dirty)); - REALM_ASSERT(is_not(m_page_state[local_page_ndx], Writable)); - char* addr = page_addr(local_page_ndx); - - if (!copy_up_to_date_page(local_page_ndx)) { - const size_t page_ndx_in_file = local_page_ndx + m_first_page; - const size_t end_page_ndx_in_file = m_first_page + m_page_state.size(); - off_t data_pos = off_t(page_ndx_in_file << m_page_shift); - if (is(m_page_state[local_page_ndx], StaleIV)) { - auto refreshed_ivs = - m_file.cryptor.refresh_ivs(m_file.fd, data_pos, page_ndx_in_file, end_page_ndx_in_file); - for (const auto& [page_ndx, state] : refreshed_ivs) { - size_t local_page_ndx_of_iv_change = page_ndx - m_first_page; - REALM_ASSERT_EX(contains_page(page_ndx), page_ndx, m_first_page, m_page_state.size()); - if (is(m_page_state[local_page_ndx_of_iv_change], Dirty | Writable)) { - continue; - } - switch (state) { - case IVRefreshState::UpToDate: - if (is(m_page_state[local_page_ndx_of_iv_change], StaleIV)) { - set(m_page_state[local_page_ndx_of_iv_change], UpToDate); - clear(m_page_state[local_page_ndx_of_iv_change], StaleIV); - } - break; - case IVRefreshState::RequiresRefresh: - clear(m_page_state[local_page_ndx_of_iv_change], StaleIV); - clear(m_page_state[local_page_ndx_of_iv_change], UpToDate); - break; - } - } - REALM_ASSERT_EX(refreshed_ivs.count(page_ndx_in_file) == 1, page_ndx_in_file, refreshed_ivs.size()); - if (refreshed_ivs[page_ndx_in_file] == IVRefreshState::UpToDate) { - return; - } - } - size_t size = static_cast(1ULL << m_page_shift); - size_t actual = m_file.cryptor.read(m_file.fd, data_pos, addr, size, m_observer); - if (actual < size) { - if (actual >= required) { - memset(addr + actual, 0x55, size - actual); - } - else { - size_t fs = to_size_t(File::get_size_static(m_file.fd)); - throw DecryptionFailed( - util::format("failed to decrypt block %1 in file of size %2", local_page_ndx + m_first_page, fs)); - } +// Whenever we advance our reader view of the file we mark all previously +// up-to-date pages as being possibly stale. On the next access of the page we +// then check if the IV for that page has changed to determine if the page has +// actually changed or if we can just mark it as being up-to-date again. +bool EncryptedFileMapping::check_possibly_stale_page(size_t local_ndx) noexcept +{ + if (is_not(m_page_state[local_ndx], StaleIV)) + return false; + size_t ndx_in_file = local_ndx + m_first_page; + bool did_change = m_file.cryptor.refresh_iv(m_file.fd, ndx_in_file); + // Update the page state in all mappings and not just the current one because + // refresh_iv() only returns true once per page per write. Deferring this + // until copy_up_to_date_page() almost works, but this mapping could be + // removed before the other mapping copies the page. + for (auto& m : m_file.mappings) { + m->assert_locked(); + if (!m->contains_page(ndx_in_file)) + continue; + auto& state = m->m_page_state[ndx_in_file - m->m_first_page]; + if (is(state, StaleIV)) { + REALM_ASSERT(is_not(state, UpToDate)); + clear(state, StaleIV); + if (!did_change) + set(state, UpToDate); } } - if (is_not(m_page_state[local_page_ndx], UpToDate)) - m_num_decrypted++; - set(m_page_state[local_page_ndx], UpToDate); - clear(m_page_state[local_page_ndx], StaleIV); + return !did_change; } -void EncryptedFileMapping::mark_pages_for_IV_check() +void EncryptedFileMapping::refresh_page(size_t local_ndx, bool to_modify) { - for (size_t i = 0; i < m_file.mappings.size(); ++i) { - EncryptedFileMapping* m = m_file.mappings[i]; - for (size_t pg = m->get_start_index(); pg < m->get_end_index(); ++pg) { - size_t local_page_ndx = pg - m->m_first_page; - if (is(m->m_page_state[local_page_ndx], UpToDate) && - is_not(m->m_page_state[local_page_ndx], Dirty | Writable)) { - REALM_ASSERT(is_not(m->m_page_state[local_page_ndx], StaleIV)); - clear(m->m_page_state[local_page_ndx], UpToDate); - set(m->m_page_state[local_page_ndx], StaleIV); - } + REALM_ASSERT_EX(local_ndx < m_page_state.size(), local_ndx, m_page_state.size()); + REALM_ASSERT(is_not(m_page_state[local_ndx], Dirty)); + REALM_ASSERT(is_not(m_page_state[local_ndx], Writable)); + if (copy_up_to_date_page(local_ndx) || check_possibly_stale_page(local_ndx)) { + return; + } + + char* addr = page_addr(local_ndx); + size_t actual = m_file.cryptor.read(m_file.fd, page_pos(local_ndx), addr, encryption_page_size, m_observer); + if (actual != encryption_page_size && !to_modify) { + size_t fs = to_size_t(File::get_size_static(m_file.fd)); + throw DecryptionFailed( + util::format("failed to decrypt block %1 in file of size %2", local_ndx + m_first_page, fs)); + } + set(m_page_state[local_ndx], UpToDate); +} + +void EncryptedFile::mark_data_as_possibly_stale() +{ + + util::CheckedLockGuard lock(mutex); + cryptor.invalidate_ivs(); + for (auto& m : mappings) { + m->assert_locked(); + m->mark_pages_for_iv_check(); + } +} + +void EncryptedFileMapping::mark_pages_for_iv_check() +{ + for (auto& state : m_page_state) { + if (is(state, UpToDate) && is_not(state, Dirty | Writable)) { + REALM_ASSERT(is_not(state, StaleIV)); + clear(state, UpToDate); + set(state, StaleIV); } } } -void EncryptedFileMapping::write_and_update_all(size_t local_page_ndx, size_t begin_offset, - size_t end_offset) noexcept +void EncryptedFileMapping::write_and_update_all(size_t local_ndx, uint16_t offset, uint16_t size) noexcept { - REALM_ASSERT(is(m_page_state[local_page_ndx], Writable)); - REALM_ASSERT(is(m_page_state[local_page_ndx], UpToDate)); + REALM_ASSERT(is(m_page_state[local_ndx], Writable)); + REALM_ASSERT(is(m_page_state[local_ndx], UpToDate)); + REALM_ASSERT(is_not(m_page_state[local_ndx], StaleIV)); + REALM_ASSERT(offset + size <= encryption_page_size); // Go through all other mappings of this file and copy changes into those mappings - size_t page_ndx_in_file = local_page_ndx + m_first_page; - for (size_t i = 0; i < m_file.mappings.size(); ++i) { - EncryptedFileMapping* m = m_file.mappings[i]; - if (m != this && m->contains_page(page_ndx_in_file)) { - size_t shadow_local_page_ndx = page_ndx_in_file - m->m_first_page; - if (is(m->m_page_state[shadow_local_page_ndx], UpToDate) || - is(m->m_page_state[shadow_local_page_ndx], StaleIV)) { // only keep up to data pages up to date - memcpy(m->page_addr(shadow_local_page_ndx) + begin_offset, page_addr(local_page_ndx) + begin_offset, - end_offset - begin_offset); - if (is(m->m_page_state[shadow_local_page_ndx], StaleIV)) { - set(m->m_page_state[shadow_local_page_ndx], UpToDate); - clear(m->m_page_state[shadow_local_page_ndx], StaleIV); - } - } - else { - m->mark_outdated(shadow_local_page_ndx); - } + size_t ndx_in_file = local_ndx + m_first_page; + for (auto& m : m_file.mappings) { + m->assert_locked(); + if (m == this || !m->contains_page(ndx_in_file)) + continue; + + size_t other_local_ndx = ndx_in_file - m->m_first_page; + auto& state = m->m_page_state[other_local_ndx]; + if (is(state, UpToDate) || is(state, StaleIV)) { // only keep up to data pages up to date + memcpy(m->page_addr(other_local_ndx) + offset, page_addr(local_ndx) + offset, size); + set(state, UpToDate); + clear(state, StaleIV); } } - set(m_page_state[local_page_ndx], Dirty); - clear(m_page_state[local_page_ndx], Writable); - clear(m_page_state[local_page_ndx], StaleIV); - size_t chunk_ndx = local_page_ndx >> page_to_chunk_shift; - if (m_chunk_dont_scan[chunk_ndx]) - m_chunk_dont_scan[chunk_ndx] = 0; + set(m_page_state[local_ndx], Dirty); + clear(m_page_state[local_ndx], Writable); } -void EncryptedFileMapping::validate_page(size_t local_page_ndx) noexcept +void EncryptedFileMapping::validate_page(size_t local_ndx) noexcept { #ifdef REALM_DEBUG - REALM_ASSERT(local_page_ndx < m_page_state.size()); - if (is_not(m_page_state[local_page_ndx], UpToDate)) + REALM_ASSERT(local_ndx < m_page_state.size()); + if (is_not(m_page_state[local_ndx], UpToDate)) return; - const size_t page_ndx_in_file = local_page_ndx + m_first_page; - if (!m_file.cryptor.read(m_file.fd, off_t(page_ndx_in_file << m_page_shift), m_validate_buffer.get(), - static_cast(1ULL << m_page_shift), m_observer)) + if (!m_file.cryptor.read(m_file.fd, page_pos(local_ndx), m_validate_buffer.get(), encryption_page_size, + m_observer)) return; - for (size_t i = 0; i < m_file.mappings.size(); ++i) { - EncryptedFileMapping* m = m_file.mappings[i]; - size_t shadow_mapping_local_ndx = page_ndx_in_file - m->m_first_page; - if (m != this && m->contains_page(page_ndx_in_file) && is(m->m_page_state[shadow_mapping_local_ndx], Dirty)) { - memcpy(m_validate_buffer.get(), m->page_addr(shadow_mapping_local_ndx), - static_cast(1ULL << m_page_shift)); + const size_t ndx_in_file = local_ndx + m_first_page; + for (auto& m : m_file.mappings) { + m->assert_locked(); + size_t other_local_ndx = ndx_in_file - m->m_first_page; + if (m != this && m->contains_page(ndx_in_file) && is(m->m_page_state[other_local_ndx], Dirty)) { + memcpy(m_validate_buffer.get(), m->page_addr(other_local_ndx), encryption_page_size); break; } } - if (memcmp(m_validate_buffer.get(), page_addr(local_page_ndx), static_cast(1ULL << m_page_shift))) { - std::cerr << "mismatch " << this << ": fd(" << m_file.fd << ")" - << "page(" << local_page_ndx << "/" << m_page_state.size() << ") " << m_validate_buffer.get() << " " - << page_addr(local_page_ndx) << std::endl; + if (memcmp(m_validate_buffer.get(), page_addr(local_ndx), encryption_page_size) != 0) { + util::format(std::cerr, "mismatch %1: fd(%2) page(%3/%4) %5 %6\n", this, m_file.fd, local_ndx, + m_page_state.size(), m_validate_buffer.get(), page_addr(local_ndx)); REALM_TERMINATE(""); } #else - static_cast(local_page_ndx); + static_cast(local_ndx); #endif } void EncryptedFileMapping::validate() noexcept { #ifdef REALM_DEBUG - const size_t num_local_pages = m_page_state.size(); - for (size_t local_page_ndx = 0; local_page_ndx < num_local_pages; ++local_page_ndx) - validate_page(local_page_ndx); + for (size_t i = 0; i < m_page_state.size(); ++i) + validate_page(i); #endif } -void EncryptedFileMapping::reclaim_page(size_t page_ndx) +void EncryptedFileMapping::do_flush() noexcept { -#ifdef _WIN32 - // On windows we don't know how to replace a page within a page range with a fresh one. - // instead we clear it. If the system runs with same-page-merging, this will reduce - // the number of used pages. - memset(page_addr(page_ndx), 0, static_cast(1) << m_page_shift); -#else - // On Posix compatible, we can request a new page in the middle of an already - // requested range, so that's what we do. This releases the backing store for the - // old page and gives us a shared zero-page that we can later demand-allocate, thus - // reducing the overall amount of used physical pages. - void* addr = page_addr(page_ndx); - void* addr2 = ::mmap(addr, 1 << m_page_shift, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); - if (addr != addr2) { - if (addr2 == 0) { - int err = errno; - throw SystemError(err, get_errno_msg("using mmap() to clear page failed", err)); + for (size_t i = 0; i < m_page_state.size(); ++i) { + if (is_not(m_page_state[i], Dirty)) { + validate_page(i); + continue; } - throw std::runtime_error("internal error in mmap()"); + m_file.cryptor.write(m_file.fd, page_pos(i), page_addr(i), encryption_page_size, m_marker); + clear(m_page_state[i], Dirty); } -#endif -} - -/* This functions is a bit convoluted. It reclaims pages, but only does a limited amount of work - * each time it's called. It saves the progress in a 'progress_ptr' so that it can resume later - * from where it was stopped. - * - * The workload is composed of workunits, each unit signifying - * 1) A scanning of the state of 4K pages - * 2) One system call (to mmap to release a page and get a new one) - * 3) A scanning of 1K entries in the "don't scan" array (corresponding to 4M pages) - * Approximately - */ -void EncryptedFileMapping::reclaim_untouched(size_t& progress_index, size_t& work_limit) noexcept -{ - const auto scan_amount_per_workunit = 4096; - bool contiguous_scan = false; - size_t next_scan_payment = scan_amount_per_workunit; - const size_t last_index = get_end_index(); - - auto done_some_work = [&]() { - if (work_limit > 0) - work_limit--; - }; - auto visit_and_potentially_reclaim = [&](size_t page_ndx) { - PageState& ps = m_page_state[page_ndx]; - if (is(ps, UpToDate)) { - if (is_not(ps, Touched) && is_not(ps, Dirty) && is_not(ps, Writable)) { - clear(ps, UpToDate); - reclaim_page(page_ndx); - m_num_decrypted--; - done_some_work(); - } - contiguous_scan = false; - } - clear(ps, Touched); - }; - - auto skip_chunk_if_possible = [&](size_t& page_ndx) // update vars corresponding to skipping a chunk if possible - { - size_t chunk_ndx = page_ndx >> page_to_chunk_shift; - if (m_chunk_dont_scan[chunk_ndx]) { - // skip to end of chunk - page_ndx = ((chunk_ndx + 1) << page_to_chunk_shift) - 1; - progress_index = m_first_page + page_ndx; - // postpone next scan payment - next_scan_payment += page_to_chunk_factor; - return true; - } - else - return false; - }; - - auto is_last_page_in_chunk = [](size_t page_ndx) { - auto page_to_chunk_mask = page_to_chunk_factor - 1; - return (page_ndx & page_to_chunk_mask) == page_to_chunk_mask; - }; - auto is_first_page_in_chunk = [](size_t page_ndx) { - auto page_to_chunk_mask = page_to_chunk_factor - 1; - return (page_ndx & page_to_chunk_mask) == 0; - }; - - while (work_limit > 0 && progress_index < last_index) { - size_t page_ndx = progress_index - m_first_page; - if (!skip_chunk_if_possible(page_ndx)) { - if (is_first_page_in_chunk(page_ndx)) { - contiguous_scan = true; - } - visit_and_potentially_reclaim(page_ndx); - // if we've scanned a full chunk contiguously, mark it as not needing scans - if (is_last_page_in_chunk(page_ndx)) { - if (contiguous_scan) { - m_chunk_dont_scan[page_ndx >> page_to_chunk_shift] = 1; - } - contiguous_scan = false; - } - } - // account for work performed: - if (page_ndx >= next_scan_payment) { - next_scan_payment = page_ndx + scan_amount_per_workunit; - done_some_work(); - } - ++progress_index; - } - return; + validate(); } void EncryptedFileMapping::flush() noexcept { - const size_t num_dirty_pages = m_page_state.size(); - for (size_t local_page_ndx = 0; local_page_ndx < num_dirty_pages; ++local_page_ndx) { - if (is_not(m_page_state[local_page_ndx], Dirty)) { - validate_page(local_page_ndx); - continue; - } - - size_t page_ndx_in_file = local_page_ndx + m_first_page; - m_file.cryptor.write(m_file.fd, off_t(page_ndx_in_file << m_page_shift), page_addr(local_page_ndx), - static_cast(1ULL << m_page_shift), m_marker); - clear(m_page_state[local_page_ndx], Dirty); - } + util::CheckedLockGuard lock(m_file.mutex); + do_flush(); +} - validate(); +void EncryptedFileMapping::sync() noexcept +{ + util::CheckedLockGuard lock(m_file.mutex); + do_sync(); } #ifdef _MSC_VER #pragma warning(disable : 4297) // throw in noexcept #endif -void EncryptedFileMapping::sync() noexcept +void EncryptedFileMapping::do_sync() noexcept { + do_flush(); + #ifdef _WIN32 if (FlushFileBuffers(m_file.fd)) return; throw std::system_error(GetLastError(), std::system_category(), "FlushFileBuffers() failed"); #else fsync(m_file.fd); - // FIXME: on iOS/OSX fsync may not be enough to ensure crash safety. - // Consider adding fcntl(F_FULLFSYNC). This most likely also applies to msync. - // - // See description of fsync on iOS here: - // https://developer.apple.com/library/ios/documentation/System/Conceptual/ManPages_iPhoneOS/man2/fsync.2.html - // - // See also - // https://developer.apple.com/library/ios/documentation/Cocoa/Conceptual/CoreData/Articles/cdPersistentStores.html - // for a discussion of this related to core data. #endif } #ifdef _MSC_VER @@ -957,134 +905,79 @@ void EncryptedFileMapping::sync() noexcept void EncryptedFileMapping::write_barrier(const void* addr, size_t size) noexcept { - // Propagate changes to all other decrypted pages mapping the same memory - + CheckedLockGuard lock(m_file.mutex); + REALM_ASSERT(size > 0); REALM_ASSERT(m_access == File::access_ReadWrite); - size_t first_accessed_local_page = get_local_index_of_address(addr); - size_t first_offset = static_cast(addr) - page_addr(first_accessed_local_page); - const char* last_accessed_address = static_cast(addr) + (size == 0 ? 0 : size - 1); - size_t last_accessed_local_page = get_local_index_of_address(last_accessed_address); - size_t pages_size = m_page_state.size(); - - // propagate changes to first page (update may be partial, may also be to last page) - if (first_accessed_local_page < pages_size) { - REALM_ASSERT_EX(is(m_page_state[first_accessed_local_page], UpToDate), - m_page_state[first_accessed_local_page]); - if (first_accessed_local_page == last_accessed_local_page) { - size_t last_offset = last_accessed_address - page_addr(first_accessed_local_page); - write_and_update_all(first_accessed_local_page, first_offset, last_offset + 1); - } - else - write_and_update_all(first_accessed_local_page, first_offset, static_cast(1) << m_page_shift); - } - // propagate changes to pages between first and last page (update only full pages) - for (size_t idx = first_accessed_local_page + 1; idx < last_accessed_local_page && idx < pages_size; ++idx) { - REALM_ASSERT(is(m_page_state[idx], UpToDate)); - write_and_update_all(idx, 0, static_cast(1) << m_page_shift); - } - // propagate changes to the last page (update may be partial) - if (first_accessed_local_page < last_accessed_local_page && last_accessed_local_page < pages_size) { - REALM_ASSERT(is(m_page_state[last_accessed_local_page], UpToDate)); - size_t last_offset = last_accessed_address - page_addr(last_accessed_local_page); - write_and_update_all(last_accessed_local_page, 0, last_offset + 1); - } -} - -void EncryptedFileMapping::read_barrier(const void* addr, size_t size, Header_to_size header_to_size, bool to_modify) -{ - size_t first_accessed_local_page = get_local_index_of_address(addr); - size_t page_size = 1ULL << m_page_shift; - size_t required = get_offset_of_address(addr) + size; - { - // make sure the first page is available - PageState& ps = m_page_state[first_accessed_local_page]; - if (is_not(ps, Touched)) - set(ps, Touched); - if (is_not(ps, UpToDate)) - refresh_page(first_accessed_local_page, to_modify ? 0 : required); - if (to_modify) - set(ps, Writable); - } - // force the page reclaimer to look into pages in this chunk: - size_t chunk_ndx = first_accessed_local_page >> page_to_chunk_shift; - if (m_chunk_dont_scan[chunk_ndx]) - m_chunk_dont_scan[chunk_ndx] = 0; + size_t local_ndx = get_local_index_of_address(addr); + auto offset_in_page = uint16_t(static_cast(addr) - page_addr(local_ndx)); + size += offset_in_page; - if (header_to_size) { - // We know it's an array, and array headers are 8-byte aligned, so it is - // included in the first page which was handled above. - size = header_to_size(static_cast(addr)); - required = get_offset_of_address(addr) + size; + // Propagate changes to all other decrypted pages mapping the same memory + while (size > 0) { + REALM_ASSERT(local_ndx < m_page_state.size()); + REALM_ASSERT(is(m_page_state[local_ndx], PageState::Writable)); + auto bytes_in_page = uint16_t(std::min(encryption_page_size, size) - offset_in_page); + write_and_update_all(local_ndx, offset_in_page, bytes_in_page); + size -= offset_in_page + bytes_in_page; + offset_in_page = 0; + ++local_ndx; } +} - size_t last_idx = get_local_index_of_address(addr, size == 0 ? 0 : size - 1); - size_t pages_size = m_page_state.size(); - - // We already checked first_accessed_local_page above, so we start the loop - // at first_accessed_local_page + 1 to check the following page. - for (size_t idx = first_accessed_local_page + 1; idx <= last_idx && idx < pages_size; ++idx) { - required -= page_size; - // force the page reclaimer to look into pages in this chunk - chunk_ndx = idx >> page_to_chunk_shift; - if (m_chunk_dont_scan[chunk_ndx]) - m_chunk_dont_scan[chunk_ndx] = 0; - - PageState& ps = m_page_state[idx]; - if (is_not(ps, Touched)) - set(ps, Touched); +void EncryptedFileMapping::read_barrier(const void* addr, size_t size, bool to_modify) +{ + CheckedLockGuard lock(m_file.mutex); + REALM_ASSERT(size > 0); + size_t begin = get_local_index_of_address(addr); + size_t end = get_local_index_of_address(addr, size - 1); + for (size_t local_ndx = begin; local_ndx <= end; ++local_ndx) { + PageState& ps = m_page_state[local_ndx]; if (is_not(ps, UpToDate)) - refresh_page(idx, to_modify ? 0 : required); + refresh_page(local_ndx, to_modify); if (to_modify) set(ps, Writable); } } -void EncryptedFileMapping::extend_to(size_t offset, size_t new_size) +void EncryptedFileMapping::extend_to(SizeType offset, size_t new_size) { - REALM_ASSERT_EX(new_size % page_size() == 0, new_size, page_size()); - size_t num_pages = new_size >> m_page_shift; - m_page_state.resize(num_pages, PageState::Clean); - m_chunk_dont_scan.resize((num_pages + page_to_chunk_factor - 1) >> page_to_chunk_shift, false); - m_file.cryptor.set_file_size((off_t)(offset + new_size)); + CheckedLockGuard lock(m_file.mutex); + REALM_ASSERT_EX(new_size % encryption_page_size == 0, new_size, encryption_page_size); + m_page_state.resize(page_count(new_size), PageState::Clean); + m_file.cryptor.set_data_size(offset + SizeType(new_size)); } -void EncryptedFileMapping::set(void* new_addr, size_t new_size, size_t new_file_offset) +void EncryptedFileMapping::set(void* new_addr, size_t new_size, SizeType new_file_offset) { - REALM_ASSERT(new_file_offset % (1ULL << m_page_shift) == 0); - REALM_ASSERT(new_size % (1ULL << m_page_shift) == 0); + CheckedLockGuard lock(m_file.mutex); + REALM_ASSERT(new_file_offset % encryption_page_size == 0); + REALM_ASSERT(new_size % encryption_page_size == 0); // This seems dangerous - correct operation in a setting with multiple (partial) // mappings of the same file would rely on ordering of individual mapping requests. // Currently we only ever extend the file - but when we implement continuous defrag, // this design should be revisited. - m_file.cryptor.set_file_size(off_t(new_size + new_file_offset)); + m_file.cryptor.set_data_size(new_file_offset + SizeType(new_size)); - flush(); + do_flush(); m_addr = new_addr; - m_first_page = new_file_offset >> m_page_shift; - size_t num_pages = new_size >> m_page_shift; - - m_num_decrypted = 0; + m_first_page = new_file_offset / encryption_page_size; m_page_state.clear(); - m_chunk_dont_scan.clear(); - - m_page_state.resize(num_pages, PageState(0)); - m_chunk_dont_scan.resize((num_pages + page_to_chunk_factor - 1) >> page_to_chunk_shift, false); + m_page_state.resize(new_size / encryption_page_size, PageState::Clean); } -File::SizeType encrypted_size_to_data_size(File::SizeType size) noexcept +SizeType encrypted_size_to_data_size(SizeType size) noexcept { - if (size == 0) - return 0; - return fake_offset(size); + return size == 0 ? 0 : file_pos_to_data_pos(size); } -File::SizeType data_size_to_encrypted_size(File::SizeType size) noexcept +SizeType data_size_to_encrypted_size(SizeType size) noexcept { - size_t ps = page_size(); - return real_offset((size + ps - 1) & ~(ps - 1)); + SizeType r = size % encryption_page_size; + size += r ? encryption_page_size - r : 0; + return data_pos_to_file_pos(size); } } // namespace realm::util #else diff --git a/src/realm/util/encrypted_file_mapping.hpp b/src/realm/util/encrypted_file_mapping.hpp index 54b056e70a8..990d68a6f9b 100644 --- a/src/realm/util/encrypted_file_mapping.hpp +++ b/src/realm/util/encrypted_file_mapping.hpp @@ -19,190 +19,169 @@ #ifndef REALM_UTIL_ENCRYPTED_FILE_MAPPING_HPP #define REALM_UTIL_ENCRYPTED_FILE_MAPPING_HPP -#include -#include #include +#include +#include + +#include + +namespace realm::util { #if REALM_ENABLE_ENCRYPTION -typedef size_t (*Header_to_size)(const char* addr); +class EncryptedFileMapping; -#include +class EncryptedFile { +public: + EncryptedFile(const char* key); -namespace realm::util { + void set_fd(FileDesc fd) + { + this->fd = fd; + } + + std::unique_ptr add_mapping(File::SizeType file_offset, void* addr, size_t size, + File::AccessMode access) REQUIRES(!mutex); -struct SharedFileInfo; + const char* get_key() const noexcept REQUIRES(!mutex) + { + // It's safe to return a pointer into cryptor outside the lock because + // the key doesn't actually change and doesn't need to be guarded by + // th emutex at all. + util::CheckedLockGuard lock(mutex); + return cryptor.get_key(); + } + + void mark_data_as_possibly_stale() REQUIRES(!mutex); + +private: + friend class EncryptedFileMapping; + + CheckedMutex mutex; + FileDesc fd; + AESCryptor cryptor GUARDED_BY(mutex); + std::vector mappings GUARDED_BY(mutex); +}; class EncryptedFileMapping { public: - // Adds the newly-created object to file.mappings iff it's successfully constructed - EncryptedFileMapping(SharedFileInfo& file, size_t file_offset, void* addr, size_t size, File::AccessMode access, - util::WriteObserver* observer = nullptr, util::WriteMarker* marker = nullptr); + EncryptedFileMapping(EncryptedFile& file, File::SizeType file_offset, void* addr, size_t size, + File::AccessMode access, util::WriteObserver* observer = nullptr, + util::WriteMarker* marker = nullptr); ~EncryptedFileMapping(); // Default implementations of copy/assign can trigger multiple destructions EncryptedFileMapping(const EncryptedFileMapping&) = delete; EncryptedFileMapping& operator=(const EncryptedFileMapping&) = delete; - // Encrypt all dirty pages, push them to shared cache and mark them read-only + // Encrypt all dirty blocks, push them to shared cache and mark them read-only // Does not call fsync - void flush() noexcept; + void flush() noexcept REQUIRES(!m_file.mutex); - // Sync the image of this file in shared cache to disk. Does not imply flush. - void sync() noexcept; + // Flush and then sync the image of this file in shared cache to disk. + void sync() noexcept REQUIRES(!m_file.mutex); // Make sure that memory in the specified range is synchronized with any // changes made globally visible through call to write_barrier or refresh_outdated_pages(). // Optionally mark the pages for later modification - void read_barrier(const void* addr, size_t size, Header_to_size header_to_size, bool to_modify); + void read_barrier(const void* addr, size_t size, bool to_modify) REQUIRES(!m_file.mutex); // Ensures that any changes made to memory in the specified range // becomes visible to any later calls to read_barrier() // Pages selected must have been marked for modification at an earlier read barrier - void write_barrier(const void* addr, size_t size) noexcept; - - // Mark pages for later checks of the ivs on disk. If the IVs have changed compared to - // the in memory versions the page will later need to be refreshed. - // This is the process by which a reader in a multiprocess scenario detects if its - // mapping should be refreshed while advancing versions. - // The pages marked for IV-checks will be refetched and re-decrypted by later calls to read_barrier. - void mark_pages_for_IV_check(); + void write_barrier(const void* addr, size_t size) noexcept REQUIRES(!m_file.mutex); // Set this mapping to a new address and size // Flushes any remaining dirty pages from the old mapping - void set(void* new_addr, size_t new_size, size_t new_file_offset); + void set(void* new_addr, size_t new_size, File::SizeType new_file_offset) REQUIRES(!m_file.mutex); // Extend the size of this mapping. Memory holding decrypted pages must // have been allocated earlier - void extend_to(size_t offset, size_t new_size); - - size_t collect_decryption_count() - { - return m_num_decrypted; - } - // reclaim any untouched pages - this is thread safe with respect to - // concurrent access/touching of pages - but must be called with the mutex locked. - void reclaim_untouched(size_t& progress_ptr, size_t& accumulated_savings) noexcept; + void extend_to(File::SizeType offset, size_t new_size) REQUIRES(!m_file.mutex); - bool contains_page(size_t page_in_file) const; - size_t get_local_index_of_address(const void* addr, size_t offset = 0) const; - size_t get_offset_of_address(const void* addr) const; + bool contains_page(size_t block_in_file) const noexcept REQUIRES(m_file.mutex); + size_t get_local_index_of_address(const void* addr, size_t offset = 0) const noexcept REQUIRES(m_file.mutex); + uint16_t get_offset_of_address(const void* addr) const noexcept REQUIRES(m_file.mutex); - size_t get_end_index() - { - return m_first_page + m_page_state.size(); - } - size_t get_start_index() - { - return m_first_page; - } - void set_marker(WriteMarker* marker) + void set_marker(WriteMarker* marker) noexcept { m_marker = marker; } - void set_observer(WriteObserver* observer) + void set_observer(WriteObserver* observer) noexcept { m_observer = observer; } -#if REALM_DEBUG - std::string print_debug(); -#endif // REALM_DEBUG + std::string print_debug() REQUIRES(!m_file.mutex); private: - SharedFileInfo& m_file; - - size_t m_page_shift; - size_t m_blocks_per_page; + friend class EncryptedFile; - void* m_addr = nullptr; + EncryptedFile& m_file; + void* m_addr GUARDED_BY(m_file.mutex) = nullptr; + size_t m_first_page GUARDED_BY(m_file.mutex); - size_t m_first_page; - size_t m_num_decrypted; // 1 for every page decrypted - - enum PageState { + enum PageState : uint8_t { Clean = 0, - Touched = 1, // a ref->ptr translation has taken place - UpToDate = 2, // the page is fully up to date - StaleIV = 4, // the page needs to check the on disk IV for changes by other processes - Writable = 8, // the page is open for writing - Dirty = 16 // the page has been modified with respect to what's on file. + UpToDate = 1, // the page is fully up to date + StaleIV = 2, // the page needs to check the on disk IV for changes by other processes + Writable = 4, // the page is open for writing + Dirty = 8 // the page has been modified with respect to what's on file. }; - std::vector m_page_state; + std::vector m_page_state GUARDED_BY(m_file.mutex); // little helpers: - inline void clear(PageState& ps, int p) + static constexpr void clear(PageState& ps, int p) { ps = PageState(ps & ~p); } - inline bool is_not(PageState& ps, int p) + static constexpr bool is_not(PageState& ps, int p) { return (ps & p) == 0; } - inline bool is(PageState& ps, int p) + static constexpr bool is(PageState& ps, int p) { return (ps & p) != 0; } - inline void set(PageState& ps, int p) + static constexpr void set(PageState& ps, int p) { ps = PageState(ps | p); } - // 1K pages form a chunk - this array allows us to skip entire chunks during scanning - std::vector m_chunk_dont_scan; - static constexpr int page_to_chunk_shift = 10; - static constexpr size_t page_to_chunk_factor = size_t(1) << page_to_chunk_shift; - File::AccessMode m_access; + const File::AccessMode m_access; util::WriteObserver* m_observer = nullptr; util::WriteMarker* m_marker = nullptr; #ifdef REALM_DEBUG - std::unique_ptr m_validate_buffer; + std::unique_ptr m_validate_buffer GUARDED_BY(m_file.mutex); #endif - char* page_addr(size_t local_page_ndx) const noexcept; - - void mark_outdated(size_t local_page_ndx) noexcept; - bool copy_up_to_date_page(size_t local_page_ndx) noexcept; - void refresh_page(size_t local_page_ndx, size_t required); - void write_and_update_all(size_t local_page_ndx, size_t begin_offset, size_t end_offset) noexcept; - void reclaim_page(size_t page_ndx); - void validate_page(size_t local_page_ndx) noexcept; - void validate() noexcept; -}; + char* page_addr(size_t local_ndx) const noexcept REQUIRES(m_file.mutex); + File::SizeType page_pos(size_t local_ndx) const noexcept REQUIRES(m_file.mutex); + bool copy_up_to_date_page(size_t local_ndx) noexcept REQUIRES(m_file.mutex); + bool check_possibly_stale_page(size_t local_ndx) noexcept REQUIRES(m_file.mutex); + void refresh_page(size_t local_ndx, bool to_modify) REQUIRES(m_file.mutex); + void write_and_update_all(size_t local_ndx, uint16_t offset, uint16_t size) noexcept REQUIRES(m_file.mutex); + void validate_page(size_t local_ndx) noexcept REQUIRES(m_file.mutex); + void validate() noexcept REQUIRES(m_file.mutex); + void do_flush() noexcept REQUIRES(m_file.mutex); + void do_sync() noexcept REQUIRES(m_file.mutex); -inline size_t EncryptedFileMapping::get_offset_of_address(const void* addr) const -{ - REALM_ASSERT_3(reinterpret_cast(addr), >=, reinterpret_cast(m_addr)); - return (reinterpret_cast(addr) - reinterpret_cast(m_addr)) & ((1ULL << m_page_shift) - 1); -} - -inline size_t EncryptedFileMapping::get_local_index_of_address(const void* addr, size_t offset) const -{ - REALM_ASSERT_EX(addr >= m_addr, size_t(addr), size_t(m_addr)); - - size_t local_ndx = - ((reinterpret_cast(addr) - reinterpret_cast(m_addr) + offset) >> m_page_shift); - REALM_ASSERT_EX(local_ndx < m_page_state.size(), local_ndx, m_page_state.size(), size_t(addr), size_t(m_addr), - m_page_shift); - return local_ndx; -} + // Mark pages for later checks of the ivs on disk. If the IVs have changed compared to + // the in memory versions the page will later need to be refreshed. + // This is the process by which a reader in a multiprocess scenario detects if its + // mapping should be refreshed while advancing versions. + // The pages marked for IV-checks will be refetched and re-decrypted by later calls to read_barrier. + void mark_pages_for_iv_check() REQUIRES(m_file.mutex); -inline bool EncryptedFileMapping::contains_page(size_t page_in_file) const -{ - // first check for (page_in_file >= m_first_page) so that the following - // subtraction using unsigned types never wraps under 0 - return page_in_file >= m_first_page && page_in_file - m_first_page < m_page_state.size(); -} + void assert_locked() noexcept ASSERT_CAPABILITY(m_file.mutex) {} +}; -#if REALM_DEBUG inline std::string EncryptedFileMapping::print_debug() { +#if REALM_DEBUG auto state_name = [](const PageState& s) -> std::string { if (s == PageState::Clean) { return "Clean"; } std::string state = "{"; - if (s & PageState::Touched) { - state += "Touched"; - } if (s & PageState::UpToDate) { state += "UpToDate"; } @@ -218,6 +197,8 @@ inline std::string EncryptedFileMapping::print_debug() state += "}"; return state; }; + + util::CheckedLockGuard lock(m_file.mutex); std::string page_states; for (PageState& s : m_page_state) { if (!page_states.empty()) { @@ -225,18 +206,20 @@ inline std::string EncryptedFileMapping::print_debug() } page_states += state_name(s); } - return util::format("%1 pages from %2 to %3: %4", m_page_state.size(), m_first_page, + return util::format("%1 blocks from %2 to %3: %4", m_page_state.size(), m_first_page, m_page_state.size() + m_first_page, page_states); -} +#else + return ""; #endif // REALM_DEBUG +} -constexpr inline size_t c_min_encrypted_file_size = 8192; - -} // namespace realm::util +constexpr inline File::SizeType c_min_encrypted_file_size = 8192; +#else // REALM_ENABLE_ENCRYPTION +class EncryptedFile {}; +class EncryptedFileMapping {}; #endif // REALM_ENABLE_ENCRYPTION -namespace realm::util { /// Thrown by EncryptedFileMapping if a file opened is non-empty and does not /// contain valid encrypted data struct DecryptionFailed : FileAccessError { diff --git a/src/realm/util/file.cpp b/src/realm/util/file.cpp index 6a604db8a8e..ea0e5dffd23 100644 --- a/src/realm/util/file.cpp +++ b/src/realm/util/file.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -49,6 +50,12 @@ using namespace realm::util; +#ifndef _WIN32 +// All mainstream platforms other than Windows migrated to 64-bit off_t many +// years ago. Supporting 32-bit off_t is possible, but not currently implemented. +static_assert(sizeof(off_t) == 8 || sizeof(size_t) == 4); +#endif + namespace { constexpr size_t c_min_supported_page_size = 4096; size_t get_page_size() @@ -67,7 +74,6 @@ size_t get_page_size() } // This variable exists such that page_size() can return the page size without having to make any system calls. -// It could also have been a static local variable, but Valgrind/Helgrind gives a false error on that. std::atomic cached_page_size = get_page_size(); bool for_each_helper(const std::string& path, const std::string& dir, realm::util::File::ForEachHandler& handler) @@ -426,11 +432,64 @@ OnlyForTestingPageSizeChange::~OnlyForTestingPageSizeChange() cached_page_size = get_page_size(); } -void File::open_internal(const std::string& path, AccessMode a, CreateMode c, int flags, bool* success) +File::File() = default; +File::File(std::string_view path, Mode m) +{ + open(path, m); +} + +File::~File() noexcept +{ + close(); +} + +File::File(File&& f) noexcept +{ +#ifdef _WIN32 + m_fd = f.m_fd; + f.m_fd = nullptr; +#else + m_fd = f.m_fd; +#ifdef REALM_FILELOCK_EMULATION + m_pipe_fd = f.m_pipe_fd; + m_has_exclusive_lock = f.m_has_exclusive_lock; + f.m_has_exclusive_lock = false; + f.m_pipe_fd = -1; +#endif + f.m_fd = -1; +#endif + m_have_lock = f.m_have_lock; + f.m_have_lock = false; + m_encryption = std::move(f.m_encryption); +} + +File& File::operator=(File&& f) noexcept +{ + close(); +#ifdef _WIN32 + m_fd = f.m_fd; + f.m_fd = nullptr; +#else + m_fd = f.m_fd; + f.m_fd = -1; +#ifdef REALM_FILELOCK_EMULATION + m_pipe_fd = f.m_pipe_fd; + f.m_pipe_fd = -1; + m_has_exclusive_lock = f.m_has_exclusive_lock; + f.m_has_exclusive_lock = false; +#endif +#endif + m_have_lock = f.m_have_lock; + f.m_have_lock = false; + m_encryption = std::move(f.m_encryption); + return *this; +} + + +void File::open_internal(std::string_view path, AccessMode a, CreateMode c, int flags, bool* success) { REALM_ASSERT_RELEASE(!is_attached()); m_path = path; // for error reporting and debugging - m_cached_unique_id = {}; #ifdef _WIN32 // Windows version @@ -464,7 +523,7 @@ void File::open_internal(const std::string& path, AccessMode a, CreateMode c, in break; } DWORD flags_and_attributes = 0; - HANDLE handle = CreateFile2(u8path(path).c_str(), desired_access, share_mode, creation_disposition, nullptr); + HANDLE handle = CreateFile2(u8path(m_path).c_str(), desired_access, share_mode, creation_disposition, nullptr); if (handle != INVALID_HANDLE_VALUE) { m_fd = handle; m_have_lock = false; @@ -521,7 +580,7 @@ void File::open_internal(const std::string& path, AccessMode a, CreateMode c, in flags2 |= O_TRUNC; if (flags & flag_Append) flags2 |= O_APPEND; - int fd = ::open(path.c_str(), flags2, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + int fd = ::open(m_path.c_str(), flags2, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if (0 <= fd) { m_fd = fd; m_have_lock = false; @@ -551,7 +610,7 @@ void File::open_internal(const std::string& path, AccessMode a, CreateMode c, in msg = util::format("Failed to open file at path '%1': parent directory does not exist", path); throw FileAccessError(ErrorCodes::FileNotFound, msg, path, err); case EEXIST: - throw Exists(msg, path); + throw Exists(msg, m_path); case ENOTDIR: msg = format("Failed to open file at path '%1': parent path is not a directory", path); [[fallthrough]]; @@ -565,6 +624,7 @@ void File::open_internal(const std::string& path, AccessMode a, CreateMode c, in void File::close() noexcept { + // FIXME: destroy m_encryption? #ifdef _WIN32 // Windows version if (!m_fd) @@ -615,7 +675,7 @@ void File::close_static(FileDesc fd) #endif } -size_t File::read_static(FileDesc fd, char* data, size_t size) +size_t File::read_static(FileDesc fd, SizeType pos, char* data, size_t size) { #ifdef _WIN32 // Windows version char* const data_0 = data; @@ -624,13 +684,17 @@ size_t File::read_static(FileDesc fd, char* data, size_t size) if (int_less_than(size, n)) n = static_cast(size); DWORD r = 0; - if (!ReadFile(fd, data, n, &r, 0)) + OVERLAPPED o{}; + o.Offset = static_cast(pos); + o.OffsetHigh = static_cast(pos >> 32); + if (!ReadFile(fd, data, n, &r, &o)) goto error; if (r == 0) break; REALM_ASSERT_RELEASE(r <= n); size -= size_t(r); data += size_t(r); + pos += r; } return data - data_0; @@ -644,7 +708,7 @@ size_t File::read_static(FileDesc fd, char* data, size_t size) while (0 < size) { // POSIX requires that 'n' is less than or equal to SSIZE_MAX size_t n = std::min(size, size_t(SSIZE_MAX)); - ssize_t r = ::read(fd, data, n); + ssize_t r = pread(fd, data, n, pos); if (r == 0) break; if (r < 0) @@ -652,6 +716,7 @@ size_t File::read_static(FileDesc fd, char* data, size_t size) REALM_ASSERT_RELEASE(size_t(r) <= n); size -= size_t(r); data += size_t(r); + pos += r; } return data - data_0; @@ -663,26 +728,21 @@ size_t File::read_static(FileDesc fd, char* data, size_t size) } -size_t File::read(char* data, size_t size) +size_t File::read(SizeType pos, char* data, size_t size) { REALM_ASSERT_RELEASE(is_attached()); - if (m_encryption_key) { - uint64_t pos_original = File::get_file_pos(m_fd); - REALM_ASSERT(!int_cast_has_overflow(pos_original)); - size_t pos = size_t(pos_original); - Map read_map(*this, access_ReadOnly, static_cast(pos + size)); - realm::util::encryption_read_barrier(read_map, pos, size); - memcpy(data, read_map.get_addr() + pos, size); - uint64_t cur = File::get_file_pos(m_fd); - seek_static(m_fd, cur + size); - return read_map.get_size() - pos; + if (m_encryption) { + Map read_map(*this, pos, access_ReadOnly, size); + util::encryption_read_barrier(read_map, 0, size); + memcpy(data, read_map.get_addr(), size); + return size; } - return read_static(m_fd, data, size); + return read_static(m_fd, pos, data, size); } -void File::write_static(FileDesc fd, const char* data, size_t size) +void File::write_static(FileDesc fd, SizeType pos, const char* data, size_t size) { #ifdef _WIN32 while (0 < size) { @@ -690,11 +750,15 @@ void File::write_static(FileDesc fd, const char* data, size_t size) if (int_less_than(size, n)) n = static_cast(size); DWORD r = 0; - if (!WriteFile(fd, data, n, &r, 0)) + OVERLAPPED o{}; + o.Offset = static_cast(pos); + o.OffsetHigh = static_cast(pos >> 32); + if (!WriteFile(fd, data, n, &r, &o)) goto error; REALM_ASSERT_RELEASE(r == n); // Partial writes are not possible. size -= size_t(r); data += size_t(r); + pos += r; } return; @@ -709,13 +773,14 @@ void File::write_static(FileDesc fd, const char* data, size_t size) while (0 < size) { // POSIX requires that 'n' is less than or equal to SSIZE_MAX size_t n = std::min(size, size_t(SSIZE_MAX)); - ssize_t r = ::write(fd, data, n); + ssize_t r = pwrite(fd, data, n, pos); if (r < 0) goto error; // LCOV_EXCL_LINE REALM_ASSERT_RELEASE(r != 0); REALM_ASSERT_RELEASE(size_t(r) <= n); size -= size_t(r); data += size_t(r); + pos += off_t(r); } return; @@ -732,44 +797,39 @@ void File::write_static(FileDesc fd, const char* data, size_t size) #endif } -void File::write(const char* data, size_t size) +void File::write(SizeType pos, const char* data, size_t size) { REALM_ASSERT_RELEASE(is_attached()); - if (m_encryption_key) { - uint64_t pos_original = get_file_pos(m_fd); - REALM_ASSERT(!int_cast_has_overflow(pos_original)); - size_t pos = size_t(pos_original); - Map write_map(*this, access_ReadWrite, static_cast(pos + size)); - realm::util::encryption_read_barrier(write_map, pos, size); - memcpy(write_map.get_addr() + pos, data, size); - realm::util::encryption_write_barrier(write_map, pos, size); - uint64_t cur = get_file_pos(m_fd); - seek(cur + size); + if (m_encryption) { + Map write_map(*this, pos, access_ReadWrite, size); + util::encryption_read_barrier(write_map, 0, size); + memcpy(write_map.get_addr(), data, size); + realm::util::encryption_write_barrier(write_map, 0, size); return; } - write_static(m_fd, data, size); + write_static(m_fd, pos, data, size); } -uint64_t File::get_file_pos(FileDesc fd) +File::SizeType File::get_file_pos() { #ifdef _WIN32 LONG high_dword = 0; LARGE_INTEGER li; LARGE_INTEGER res; li.QuadPart = 0; - bool ok = SetFilePointerEx(fd, li, &res, FILE_CURRENT); + bool ok = SetFilePointerEx(m_fd, li, &res, FILE_CURRENT); if (!ok) throw SystemError(GetLastError(), "SetFilePointer() failed"); - return uint64_t(res.QuadPart); + return SizeType(res.QuadPart); #else - auto pos = lseek(fd, 0, SEEK_CUR); + auto pos = lseek(m_fd, 0, SEEK_CUR); if (pos < 0) { throw SystemError(errno, "lseek() failed"); } - return uint64_t(pos); + return SizeType(pos); #endif } @@ -812,12 +872,10 @@ File::SizeType File::get_size() const REALM_ASSERT_RELEASE(is_attached()); File::SizeType size = get_size_static(m_fd); - if (m_encryption_key) { - File::SizeType ret_size = encrypted_size_to_data_size(size); - return ret_size; + if (m_encryption) { + return encrypted_size_to_data_size(size); } - else - return size; + return size; } @@ -825,26 +883,18 @@ void File::resize(SizeType size) { REALM_ASSERT_RELEASE(is_attached()); -#ifdef _WIN32 // Windows version - - // Save file position - SizeType p = get_file_pos(m_fd); - - if (m_encryption_key) + if (m_encryption) size = data_size_to_encrypted_size(size); - // Windows docs say "it is not an error to set the file pointer to a position beyond the end of the file." - // so seeking with SetFilePointerEx() will not error out even if there is no disk space left. - // In this scenario though, the following call to SedEndOfFile() will fail if there is no disk space left. - seek(size); - - if (!SetEndOfFile(m_fd)) { +#ifdef _WIN32 // Windows version + FILE_END_OF_FILE_INFO info; + info.EndOfFile.QuadPart = size; + if (!SetFileInformationByHandle(m_fd, FileEndOfFileInfo, &info, sizeof(info))) { DWORD err = GetLastError(); // Eliminate any risk of clobbering if (err == ERROR_HANDLE_DISK_FULL || err == ERROR_DISK_FULL) { - std::string msg = get_last_error_msg("SetEndOfFile() failed: ", err); - throw OutOfDiskSpace(msg); + throw OutOfDiskSpace(get_last_error_msg("SetFileInformationByHandle() failed: ", err)); } - throw SystemError(int(err), "SetEndOfFile() failed"); + throw SystemError(int(err), "SetFileInformationByHandle() failed"); } // Restore file position @@ -852,9 +902,6 @@ void File::resize(SizeType size) #else // POSIX version - if (m_encryption_key) - size = data_size_to_encrypted_size(size); - off_t size2; if (int_cast_with_overflow_detect(size, size2)) throw RuntimeError(ErrorCodes::RangeError, "File size overflow"); @@ -874,62 +921,37 @@ void File::resize(SizeType size) } -void File::prealloc(size_t size) +void File::prealloc(SizeType size) { REALM_ASSERT_RELEASE(is_attached()); - - if (size <= to_size_t(get_size())) { + if (size <= get_size()) { return; } - size_t new_size = size; - if (m_encryption_key) { - new_size = static_cast(data_size_to_encrypted_size(size)); - REALM_ASSERT(size == static_cast(encrypted_size_to_data_size(new_size))); - if (new_size < size) { - throw RuntimeError(ErrorCodes::RangeError, "File size overflow: data_size_to_encrypted_size(" + - realm::util::to_string(size) + - ") == " + realm::util::to_string(new_size)); - } + SizeType new_size = size; + if (m_encryption) { + new_size = data_size_to_encrypted_size(size); + REALM_ASSERT(size == encrypted_size_to_data_size(new_size)); } auto manually_consume_space = [&]() { constexpr size_t chunk_size = 4096; - int64_t original_size = get_size_static(m_fd); // raw size - seek(original_size); - size_t num_bytes = size_t(new_size - original_size); + SizeType original_size = get_size_static(m_fd); // raw size + SizeType num_bytes = new_size - original_size; std::string zeros(chunk_size, '\0'); while (num_bytes > 0) { - size_t t = num_bytes > chunk_size ? chunk_size : num_bytes; - write_static(m_fd, zeros.c_str(), t); + size_t t = std::min(num_bytes, chunk_size); + write_static(m_fd, original_size, zeros.c_str(), t); num_bytes -= t; } }; - auto consume_space_interlocked = [&] { -#if REALM_ENABLE_ENCRYPTION - if (m_encryption_key) { - // We need to prevent concurrent calls to lseek from the encryption layer - // while we're writing to the file to extend it. Otherwise an intervening - // lseek may redirect the writing process, causing file corruption. - UniqueLock lck(util::mapping_mutex); - manually_consume_space(); - } - else { - manually_consume_space(); - } -#else - manually_consume_space(); -#endif - }; - #if REALM_HAVE_POSIX_FALLOCATE // Mostly Linux only if (!prealloc_if_supported(0, new_size)) { - consume_space_interlocked(); + manually_consume_space(); } -#else // Non-atomic fallback -#if REALM_PLATFORM_APPLE +#elif REALM_PLATFORM_APPLE // Non-atomic fallback // posix_fallocate() is not supported on MacOS or iOS, so use a combination of fcntl(F_PREALLOCATE) and // ftruncate(). @@ -939,22 +961,21 @@ void File::prealloc(size_t size) throw SystemError(err, "fstat() inside prealloc() failed"); } - size_t allocated_size; + SizeType allocated_size; if (int_cast_with_overflow_detect(statbuf.st_blocks, allocated_size)) { throw RuntimeError(ErrorCodes::RangeError, - "Overflow on block conversion to size_t " + realm::util::to_string(statbuf.st_blocks)); + util::format("Overflow on block conversion to SizeType %1", statbuf.st_blocks)); } if (int_multiply_with_overflow_detect(allocated_size, S_BLKSIZE)) { - throw RuntimeError(ErrorCodes::RangeError, "Overflow computing existing file space allocation blocks: " + - realm::util::to_string(allocated_size) + - " block size: " + realm::util::to_string(S_BLKSIZE)); + throw RuntimeError(ErrorCodes::RangeError, + util::format("Overflow computing existing file space allocation blocks: %1 block size %2", + allocated_size, S_BLKSIZE)); } // Only attempt to preallocate space if there's not already sufficient free space in the file. // APFS would fail with EINVAL if we attempted it, and HFS+ would preallocate extra space unnecessarily. // See for details. if (new_size > allocated_size) { - off_t to_allocate = static_cast(new_size - statbuf.st_size); fstore_t store = {F_ALLOCATEALL, F_PEOFPOSMODE, 0, to_allocate, 0}; int ret = 0; @@ -973,7 +994,7 @@ void File::prealloc(size_t size) // 2) fcntl will fail with ENOTSUP on non-supported file systems such as ExFAT. In this case // the fallback should succeed. // 3) if there is some other error such as no space left (ENOSPC) we will expect to fail again later - consume_space_interlocked(); + manually_consume_space(); } } @@ -990,18 +1011,14 @@ void File::prealloc(size_t size) throw SystemError(err, "ftruncate() inside prealloc() failed"); } #elif REALM_ANDROID || defined(_WIN32) || defined(__EMSCRIPTEN__) - - consume_space_interlocked(); - + manually_consume_space(); #else #error Please check if/how your OS supports file preallocation -#endif - #endif // REALM_HAVE_POSIX_FALLOCATE } -bool File::prealloc_if_supported(SizeType offset, size_t size) +bool File::prealloc_if_supported(SizeType offset, SizeType size) { REALM_ASSERT_RELEASE(is_attached()); @@ -1356,93 +1373,6 @@ void File::rw_unlock() noexcept #endif // REALM_FILELOCK_EMULATION } -void* File::map(AccessMode a, size_t size, int /*map_flags*/, size_t offset) const -{ - return realm::util::mmap({m_fd, m_path, a, m_encryption_key.get()}, size, offset); -} - -void* File::map_fixed(AccessMode a, void* address, size_t size, int /* map_flags */, size_t offset) const -{ - if (m_encryption_key.get()) { - // encryption enabled - this is not supported - see explanation in alloc_slab.cpp - REALM_ASSERT(false); - } -#ifdef _WIN32 - // windows, no encryption - this is not supported, see explanation in alloc_slab.cpp, - // above the method 'update_reader_view()' - REALM_ASSERT(false); - return nullptr; -#else - // unencrypted - mmap part of already reserved space - return realm::util::mmap_fixed(m_fd, address, size, a, offset, m_encryption_key.get()); -#endif -} - -void* File::map_reserve(AccessMode a, size_t size, size_t offset) const -{ - static_cast(a); // FIXME: Consider removing this argument - return realm::util::mmap_reserve(m_fd, size, offset); -} - -#if REALM_ENABLE_ENCRYPTION -void* File::map(AccessMode a, size_t size, EncryptedFileMapping*& mapping, int /*map_flags*/, size_t offset) const -{ - return realm::util::mmap({m_fd, m_path, a, m_encryption_key.get()}, size, offset, mapping); -} - -void* File::map_fixed(AccessMode a, void* address, size_t size, EncryptedFileMapping* mapping, int /* map_flags */, - size_t offset) const -{ - if (m_encryption_key.get()) { - // encryption enabled - we shouldn't be here, all memory was allocated by reserve - REALM_ASSERT_RELEASE(false); - } -#ifndef _WIN32 - // no encryption. On Unixes, map relevant part of reserved virtual address range - return realm::util::mmap_fixed(m_fd, address, size, a, offset, nullptr, mapping); -#else - // no encryption - unsupported on windows - REALM_ASSERT(false); - return nullptr; -#endif -} - -void* File::map_reserve(AccessMode a, size_t size, size_t offset, EncryptedFileMapping*& mapping) const -{ - if (m_encryption_key.get()) { - // encrypted file - just mmap it, the encryption layer handles if the mapping extends beyond eof - return realm::util::mmap({m_fd, m_path, a, m_encryption_key.get()}, size, offset, mapping); - } -#ifndef _WIN32 - // not encrypted, do a proper reservation on Unixes' - return realm::util::mmap_reserve({m_fd, m_path, a, nullptr}, size, offset, mapping); -#else - // on windows, this is a no-op - return nullptr; -#endif -} - -#endif // REALM_ENABLE_ENCRYPTION - -void File::unmap(void* addr, size_t size) noexcept -{ - realm::util::munmap(addr, size); -} - - -void* File::remap(void* old_addr, size_t old_size, AccessMode a, size_t new_size, int /*map_flags*/, - size_t file_offset) const -{ - return realm::util::mremap({m_fd, m_path, a, m_encryption_key.get()}, file_offset, old_addr, old_size, new_size); -} - - -void File::sync_map(FileDesc fd, void* addr, size_t size) -{ - realm::util::msync(fd, addr, size); -} - - bool File::exists(const std::string& path) { #if REALM_HAVE_STD_FILESYSTEM @@ -1587,10 +1517,12 @@ bool File::copy(const std::string& origin_path, const std::string& target_path, } size_t buffer_size = 4096; + off_t pos = 0; auto buffer = std::make_unique(buffer_size); // Throws for (;;) { - size_t n = origin_file.read(buffer.get(), buffer_size); // Throws - target_file.write(buffer.get(), n); // Throws + size_t n = origin_file.read(pos, buffer.get(), buffer_size); // Throws + target_file.write(pos, buffer.get(), n); // Throws + pos += n; if (n < buffer_size) break; } @@ -1600,26 +1532,6 @@ bool File::copy(const std::string& origin_path, const std::string& target_path, } -bool File::compare(const std::string& path_1, const std::string& path_2) -{ - File file_1{path_1}; // Throws - File file_2{path_2}; // Throws - size_t buffer_size = 4096; - std::unique_ptr buffer_1 = std::make_unique(buffer_size); // Throws - std::unique_ptr buffer_2 = std::make_unique(buffer_size); // Throws - for (;;) { - size_t n_1 = file_1.read(buffer_1.get(), buffer_size); // Throws - size_t n_2 = file_2.read(buffer_2.get(), buffer_size); // Throws - if (n_1 != n_2) - return false; - if (!std::equal(buffer_1.get(), buffer_1.get() + n_1, buffer_2.get())) - return false; - if (n_1 < buffer_size) - break; - } - return true; -} - bool File::is_same_file_static(FileDesc f1, FileDesc f2, const std::string& path1, const std::string& path2) { return get_unique_id(f1, path1) == get_unique_id(f2, path2); @@ -1649,23 +1561,6 @@ FileDesc File::dup_file_desc(FileDesc fd) return fd_duped; } -File::UniqueID File::get_unique_id() -{ - REALM_ASSERT_RELEASE(is_attached()); - File::UniqueID uid = File::get_unique_id(m_fd, m_path); - if (!m_cached_unique_id) { - m_cached_unique_id = std::make_optional(uid); - } - if (m_cached_unique_id != uid) { - throw FileAccessError(ErrorCodes::FileOperationFailed, - util::format("The unique id of this Realm file has changed unexpectedly, this could be " - "due to modifications by an external process '%1'", - m_path), - m_path); - } - return uid; -} - FileDesc File::get_descriptor() const { return m_fd; @@ -1815,12 +1710,11 @@ void File::set_encryption_key(const char* key) { #if REALM_ENABLE_ENCRYPTION if (key) { - auto buffer = std::make_unique(64); - memcpy(buffer.get(), key, 64); - m_encryption_key = std::move(buffer); + m_encryption = std::make_unique(key); + m_encryption->set_fd(m_fd); } else { - m_encryption_key.reset(); + m_encryption.reset(); } #else if (key) { @@ -1829,22 +1723,32 @@ void File::set_encryption_key(const char* key) #endif } -const char* File::get_encryption_key() const +EncryptedFile* File::get_encryption() const noexcept { - return m_encryption_key.get(); +#if REALM_ENABLE_ENCRYPTION + return m_encryption.get(); +#else + return nullptr; +#endif +} + +File::MapBase::MapBase() noexcept = default; +File::MapBase::~MapBase() noexcept +{ + unmap(); } -void File::MapBase::map(const File& f, AccessMode a, size_t size, int map_flags, size_t offset, - util::WriteObserver* observer) +void File::MapBase::map(const File& f, AccessMode a, size_t size, SizeType offset, util::WriteObserver* observer) { REALM_ASSERT(!m_addr); #if REALM_ENABLE_ENCRYPTION - m_addr = f.map(a, size, m_encrypted_mapping, map_flags, offset); + m_addr = mmap({f.m_fd, a, f.m_encryption.get()}, size, offset, m_encrypted_mapping); if (observer && m_encrypted_mapping) { m_encrypted_mapping->set_observer(observer); } #else - m_addr = f.map(a, size, map_flags, offset); + std::unique_ptr dummy_encrypted_mapping; + m_addr = mmap({f.m_fd, a, nullptr}, size, offset, dummy_encrypted_mapping); static_cast(observer); #endif m_size = m_reservation_size = size; @@ -1860,25 +1764,15 @@ void File::MapBase::unmap() noexcept return; REALM_ASSERT(m_reservation_size); #if REALM_ENABLE_ENCRYPTION - if (m_encrypted_mapping) { - m_encrypted_mapping = nullptr; - util::remove_encrypted_mapping(m_addr, m_size); - } + m_encrypted_mapping = nullptr; #endif - ::munmap(m_addr, m_reservation_size); + munmap(m_addr, m_reservation_size); m_addr = nullptr; m_size = 0; m_reservation_size = 0; } -void File::MapBase::remap(const File& f, AccessMode a, size_t size, int map_flags) -{ - REALM_ASSERT(m_addr); - m_addr = f.remap(m_addr, m_size, a, size, map_flags); - m_size = m_reservation_size = size; -} - -bool File::MapBase::try_reserve(const File& file, AccessMode a, size_t size, size_t offset, +bool File::MapBase::try_reserve(const File& file, AccessMode a, size_t size, SizeType offset, util::WriteObserver* observer) { #ifdef _WIN32 @@ -1896,9 +1790,8 @@ bool File::MapBase::try_reserve(const File& file, AccessMode a, size_t size, siz m_fd = file.get_descriptor(); m_offset = offset; #if REALM_ENABLE_ENCRYPTION - if (file.m_encryption_key) { - m_encrypted_mapping = - util::reserve_mapping(addr, {m_fd, file.get_path(), a, file.m_encryption_key.get()}, offset); + if (file.m_encryption) { + m_encrypted_mapping = util::reserve_mapping(addr, {m_fd, a, file.m_encryption.get()}, offset); if (observer) { m_encrypted_mapping->set_observer(observer); } @@ -1915,7 +1808,6 @@ bool File::MapBase::try_extend_to(size_t size) noexcept if (size > m_reservation_size) { return false; } - // return false; #ifndef _WIN32 char* extension_start_addr = (char*)m_addr + m_size; size_t extension_size = size - m_size; @@ -1927,14 +1819,14 @@ bool File::MapBase::try_extend_to(size_t size) noexcept if (got_addr == MAP_FAILED) return false; REALM_ASSERT(got_addr == extension_start_addr); - util::extend_encrypted_mapping(m_encrypted_mapping, m_addr, m_offset, m_size, size); + m_encrypted_mapping->extend_to(m_offset, size); m_size = size; return true; } #endif try { - void* got_addr = util::mmap_fixed(m_fd, extension_start_addr, extension_size, m_access_mode, - extension_start_offset, nullptr); + void* got_addr = + util::mmap_fixed(m_fd, extension_start_addr, extension_size, m_access_mode, extension_start_offset); if (got_addr == extension_start_addr) { m_size = size; return true; @@ -1950,8 +1842,14 @@ bool File::MapBase::try_extend_to(size_t size) noexcept void File::MapBase::sync() { REALM_ASSERT(m_addr); +#if REALM_ENABLE_ENCRYPTION + if (m_encrypted_mapping) { + m_encrypted_mapping->sync(); + return; + } +#endif - File::sync_map(m_fd, m_addr, m_size); + realm::util::msync(m_fd, m_addr, m_size); } void File::MapBase::flush() @@ -1959,7 +1857,7 @@ void File::MapBase::flush() REALM_ASSERT(m_addr); #if REALM_ENABLE_ENCRYPTION if (m_encrypted_mapping) { - realm::util::encryption_flush(m_encrypted_mapping); + m_encrypted_mapping->flush(); } #endif } diff --git a/src/realm/util/file.hpp b/src/realm/util/file.hpp index 6fc910280ca..21aaa250556 100644 --- a/src/realm/util/file.hpp +++ b/src/realm/util/file.hpp @@ -54,6 +54,7 @@ namespace realm::util { +class EncryptedFile; class EncryptedFileMapping; class WriteObserver; @@ -149,13 +150,13 @@ class File { mode_Append ///< access_ReadWrite, create_Auto, flag_Append (fopen: ab+) }; - /// Equivalent to calling open(const std::string&, Mode) on a + /// Equivalent to calling open(std::string_view, Mode) on a /// default constructed instance. - explicit File(const std::string& path, Mode = mode_Read); + explicit File(std::string_view path, Mode = mode_Read); /// Create an instance that is not initially attached to an open /// file. - File() = default; + File(); ~File() noexcept; File(File&&) noexcept; @@ -174,7 +175,7 @@ class File { /// derived from AccessError, the derived exception type is thrown /// (as long as the underlying system provides the information to /// unambiguously distinguish that particular reason). - void open(const std::string& path, Mode = mode_Read); + void open(std::string_view path, Mode = mode_Read); /// This function is idempotent, that is, it is valid to call it /// regardless of whether this instance currently is attached to @@ -202,18 +203,21 @@ class File { flag_Append = 2 ///< Move to end of file before each write. }; - /// See open(const std::string&, Mode). + /// See open(std::string_view, Mode). /// /// Specifying access_ReadOnly together with a create mode that is /// not create_Never, or together with a non-zero \a flags /// argument, results in undefined behavior. Specifying flag_Trunc /// together with create_Must results in undefined behavior. - void open(const std::string& path, AccessMode, CreateMode, int flags); + void open(std::string_view path, AccessMode, CreateMode, int flags); /// Same as open(path, access_ReadWrite, create_Auto, 0), except /// that this one returns an indication of whether a new file was /// created, or an existing file was opened. - void open(const std::string& path, bool& was_created); + void open(std::string_view path, bool& was_created); + + /// Plays the same role as off_t in POSIX + typedef int_fast64_t SizeType; /// Read data into the specified buffer and return the number of /// bytes read. If the returned number of bytes is less than \a @@ -221,8 +225,8 @@ class File { /// /// Calling this function on an instance, that is not currently /// attached to an open file, has undefined behavior. - size_t read(char* data, size_t size); - static size_t read_static(FileDesc fd, char* data, size_t size); + size_t read(SizeType pos, char* data, size_t size); + static size_t read_static(FileDesc fd, SizeType pos, char* data, size_t size); /// Write the specified data to this file. /// @@ -231,35 +235,32 @@ class File { /// /// Calling this function on an instance, that was opened in /// read-only mode, has undefined behavior. - void write(const char* data, size_t size); - static void write_static(FileDesc fd, const char* data, size_t size); + void write(SizeType pos, const char* data, size_t size); + static void write_static(FileDesc fd, SizeType pos, const char* data, size_t size); // Tells current file pointer of fd - static uint64_t get_file_pos(FileDesc fd); + SizeType get_file_pos(); /// Calls write(s.data(), s.size()). - void write(const std::string& s) + void write(SizeType pos, std::string_view s) { - write(s.data(), s.size()); + write(pos, s.data(), s.size()); } /// Calls read(data, N). template - size_t read(char (&data)[N]) + size_t read(SizeType pos, char (&data)[N]) { - return read(data, N); + return read(pos, data, N); } /// Calls write(data(), N). template - void write(const char (&data)[N]) + void write(SizeType pos, const char (&data)[N]) { - write(data, N); + write(pos, data, N); } - /// Plays the same role as off_t in POSIX - typedef int_fast64_t SizeType; - /// Calling this function on an instance that is not attached to /// an open file has undefined behavior. SizeType get_size() const; @@ -287,7 +288,7 @@ class File { /// through distinct File instances. /// /// \sa prealloc_if_supported() - void prealloc(size_t new_size); + void prealloc(SizeType new_size); /// When supported by the system, allocate space on the target /// device for the specified region of the file. If the region @@ -308,7 +309,7 @@ class File { /// /// \sa prealloc() /// \sa is_prealloc_supported() - bool prealloc_if_supported(SizeType offset, size_t size); + bool prealloc_if_supported(SizeType offset, SizeType size); /// See prealloc_if_supported(). static bool is_prealloc_supported(); @@ -394,22 +395,11 @@ class File { /// \param key A 64-byte encryption key, or null to disable encryption. void set_encryption_key(const char* key); - /// Get the encryption key set by set_encryption_key(), - /// null_ptr if no key set. - const char* get_encryption_key() const; + EncryptedFile* get_encryption() const noexcept; /// Set the path used for emulating file locks. If not set explicitly, /// the emulation will use the path of the file itself suffixed by ".fifo" void set_fifo_path(const std::string& fifo_dir_path, const std::string& fifo_file_name); - enum { - /// If possible, disable opportunistic flushing of dirted - /// pages of a memory mapped file to physical medium. On some - /// systems this cannot be disabled. On other systems it is - /// the default behavior. An explicit call to sync_map() will - /// flush the buffers regardless of whether this flag is - /// specified or not. - map_NoSync = 1 - }; /// Map this file into memory. The file is mapped as shared /// memory. This allows two processes to interact under exatly the @@ -431,39 +421,6 @@ class File { /// /// Calling this function with a size that is greater than the /// size of the file has undefined behavior. - void* map(AccessMode, size_t size, int map_flags = 0, size_t offset = 0) const; - void* map_fixed(AccessMode, void* address, size_t size, int map_flags = 0, size_t offset = 0) const; - void* map_reserve(AccessMode, size_t size, size_t offset) const; - /// The same as unmap(old_addr, old_size) followed by map(a, - /// new_size, map_flags), but more efficient on some systems. - /// - /// The old address range must have been acquired by a call to - /// map() or remap() on this File instance, the specified access - /// mode and flags must be the same as the ones specified - /// previously, and this File instance must not have been reopend - /// in the meantime. Failing to adhere to these rules will result - /// in undefined behavior. - /// - /// If this function throws, the old address range will remain - /// mapped. - void* remap(void* old_addr, size_t old_size, AccessMode a, size_t new_size, int map_flags = 0, - size_t file_offset = 0) const; - -#if REALM_ENABLE_ENCRYPTION - void* map(AccessMode, size_t size, EncryptedFileMapping*& mapping, int map_flags = 0, size_t offset = 0) const; - void* map_fixed(AccessMode, void* address, size_t size, EncryptedFileMapping* mapping, int map_flags = 0, - size_t offset = 0) const; - void* map_reserve(AccessMode, size_t size, size_t offset, EncryptedFileMapping*& mapping) const; -#endif - /// Unmap the specified address range which must have been - /// previously returned by map(). - static void unmap(void* addr, size_t size) noexcept; - - /// Flush in-kernel buffers to disk. This blocks the caller until - /// the synchronization operation is complete. The specified - /// address range must be (a subset of) one that was previously returned by - /// map(). - static void sync_map(FileDesc fd, void* addr, size_t size); /// Check whether the specified file or directory exists. Note /// that a file or directory that resides in a directory that the @@ -522,10 +479,6 @@ class File { /// Copy the file at the specified origin path to the specified target path. static bool copy(const std::string& origin_path, const std::string& target_path, bool overwrite_existing = true); - /// Compare the two files at the specified paths for equality. Returns true - /// if, and only if they are equal. - static bool compare(const std::string& path_1, const std::string& path_2); - /// Check whether two open file descriptors refer to the same /// underlying file, that is, if writing via one of them, will /// affect what is read from the other. In UNIX this boils down to @@ -609,9 +562,6 @@ class File { uint_fast64_t inode; #endif }; - // Return the unique id for the current opened file descriptor. - // Same UniqueID means they are the same file. - UniqueID get_unique_id(); // Throws // Return the file descriptor for the file FileDesc get_descriptor() const; // Return the path of the open file, or an empty string if @@ -648,13 +598,12 @@ class File { std::string m_fifo_path; #endif #endif - std::unique_ptr m_encryption_key = nullptr; + std::unique_ptr m_encryption; std::string m_path; - std::optional m_cached_unique_id; bool lock(bool exclusive, bool non_blocking); bool rw_lock(bool exclusive, bool non_blocking); - void open_internal(const std::string& path, AccessMode, CreateMode, int flags, bool* success); + void open_internal(std::string_view path, AccessMode, CreateMode, int flags, bool* success); #ifdef REALM_FILELOCK_EMULATION bool has_shared_lock() const noexcept @@ -667,11 +616,11 @@ class File { void* m_addr = nullptr; mutable size_t m_size = 0; size_t m_reservation_size = 0; - size_t m_offset = 0; + uint64_t m_offset = 0; FileDesc m_fd; AccessMode m_access_mode = access_ReadOnly; - MapBase() noexcept = default; + MapBase() noexcept; ~MapBase() noexcept; // Disable copying. Copying an opened MapBase will create a scenario @@ -679,14 +628,11 @@ class File { MapBase(const MapBase&) = delete; MapBase& operator=(const MapBase&) = delete; - // Use - void map(const File&, AccessMode, size_t size, int map_flags, size_t offset = 0, - util::WriteObserver* observer = nullptr); + void map(const File&, AccessMode, size_t size, SizeType offset = 0, util::WriteObserver* observer = nullptr); // reserve address space for later mapping operations. // returns false if reservation can't be done. - bool try_reserve(const File&, AccessMode, size_t size, size_t offset = 0, + bool try_reserve(const File&, AccessMode, size_t size, SizeType offset = 0, util::WriteObserver* observer = nullptr); - void remap(const File&, AccessMode, size_t size, int map_flags); void unmap() noexcept; // fully update any process shared representation (e.g. buffer cache). // other processes will be able to see changes, but a full platform crash @@ -699,13 +645,13 @@ class File { // crash will *not* have lost data. void sync(); #if REALM_ENABLE_ENCRYPTION - mutable util::EncryptedFileMapping* m_encrypted_mapping = nullptr; - inline util::EncryptedFileMapping* get_encrypted_mapping() const + mutable std::unique_ptr m_encrypted_mapping; + util::EncryptedFileMapping* get_encrypted_mapping() const { - return m_encrypted_mapping; + return m_encrypted_mapping.get(); } #else - inline util::EncryptedFileMapping* get_encrypted_mapping() const + util::EncryptedFileMapping* get_encrypted_mapping() const { return nullptr; } @@ -713,7 +659,6 @@ class File { }; }; - /// This class provides a RAII abstraction over the concept of a /// memory mapped file. /// @@ -733,15 +678,15 @@ template class File::Map : private MapBase { public: /// Equivalent to calling map() on a default constructed instance. - explicit Map(const File&, AccessMode = access_ReadOnly, size_t size = sizeof(T), int map_flags = 0, + explicit Map(const File&, AccessMode = access_ReadOnly, size_t size = sizeof(T), util::WriteObserver* observer = nullptr); - explicit Map(const File&, size_t offset, AccessMode = access_ReadOnly, size_t size = sizeof(T), int map_flags = 0, + explicit Map(const File&, SizeType offset, AccessMode = access_ReadOnly, size_t size = sizeof(T), util::WriteObserver* observer = nullptr); /// Create an instance that is not initially attached to a memory /// mapped file. - Map() noexcept; + Map() noexcept = default; // Disable copying. Copying an opened Map will create a scenario // where the same memory will be mapped once but unmapped twice. @@ -764,8 +709,7 @@ class File::Map : private MapBase { other.m_addr = nullptr; other.m_size = other.m_reservation_size = 0; #if REALM_ENABLE_ENCRYPTION - m_encrypted_mapping = other.m_encrypted_mapping; - other.m_encrypted_mapping = nullptr; + m_encrypted_mapping = std::move(other.m_encrypted_mapping); #endif return *this; } @@ -780,7 +724,7 @@ class File::Map : private MapBase { /// attached to a memory mapped file has undefined behavior. The /// returned pointer is the same as what will subsequently be /// returned by get_addr(). - T* map(const File&, AccessMode = access_ReadOnly, size_t size = sizeof(T), int map_flags = 0, size_t offset = 0, + T* map(const File&, AccessMode = access_ReadOnly, size_t size = sizeof(T), SizeType offset = 0, util::WriteObserver* observer = nullptr); /// See File::unmap(). This function is idempotent, that is, it is @@ -788,16 +732,21 @@ class File::Map : private MapBase { /// currently attached to a memory mapped file. void unmap() noexcept; - bool try_reserve(const File&, AccessMode a = access_ReadOnly, size_t size = sizeof(T), size_t offset = 0, + bool try_reserve(const File&, AccessMode a = access_ReadOnly, size_t size = sizeof(T), SizeType offset = 0, util::WriteObserver* observer = nullptr); - /// See File::remap(). + /// The same as unmap(old_addr, old_size) followed by map(a, + /// new_size, map_flags), but more efficient on some systems. + /// /// /// Calling this function on a Map instance that is not currently attached /// to a memory mapped file is equivalent to calling map(). The returned /// pointer is the same as what will subsequently be returned by /// get_addr(). - T* remap(const File&, AccessMode = access_ReadOnly, size_t size = sizeof(T), int map_flags = 0); + /// + /// If this function throws, the old address range will remain + /// mapped. + T* remap(const File&, AccessMode = access_ReadOnly, size_t size = sizeof(T)); /// Try to extend the existing mapping to a given size bool try_extend_to(size_t size) noexcept; @@ -806,9 +755,9 @@ class File::Map : private MapBase { /// /// Calling this function on an instance that is not currently /// attached to a memory mapped file, has undefined behavior. - void sync(); + using MapBase::flush; + using MapBase::sync; - void flush(); /// Check whether this Map instance is currently attached to a /// memory mapped file. bool is_attached() const noexcept; @@ -834,18 +783,8 @@ class File::Map : private MapBase { return m_access_mode == access_ReadWrite; } -#if REALM_ENABLE_ENCRYPTION /// Get the encrypted file mapping corresponding to this mapping - inline EncryptedFileMapping* get_encrypted_mapping() const - { - return m_encrypted_mapping; - } -#else - inline EncryptedFileMapping* get_encrypted_mapping() const - { - return nullptr; - } -#endif + using MapBase::get_encrypted_mapping; friend class UnmapGuard; }; @@ -967,16 +906,6 @@ class DirScanner { // Implementation: -inline File::File(const std::string& path, Mode m) -{ - open(path, m); -} - -inline File::~File() noexcept -{ - close(); -} - inline void File::set_fifo_path(const std::string& fifo_dir_path, const std::string& fifo_file_name) { #ifdef REALM_FILELOCK_EMULATION @@ -988,49 +917,7 @@ inline void File::set_fifo_path(const std::string& fifo_dir_path, const std::str #endif } -inline File::File(File&& f) noexcept -{ -#ifdef _WIN32 - m_fd = f.m_fd; - f.m_fd = nullptr; -#else - m_fd = f.m_fd; -#ifdef REALM_FILELOCK_EMULATION - m_pipe_fd = f.m_pipe_fd; - m_has_exclusive_lock = f.m_has_exclusive_lock; - f.m_has_exclusive_lock = false; - f.m_pipe_fd = -1; -#endif - f.m_fd = -1; -#endif - m_have_lock = f.m_have_lock; - f.m_have_lock = false; - m_encryption_key = std::move(f.m_encryption_key); -} - -inline File& File::operator=(File&& f) noexcept -{ - close(); -#ifdef _WIN32 - m_fd = f.m_fd; - f.m_fd = nullptr; -#else - m_fd = f.m_fd; - f.m_fd = -1; -#ifdef REALM_FILELOCK_EMULATION - m_pipe_fd = f.m_pipe_fd; - f.m_pipe_fd = -1; - m_has_exclusive_lock = f.m_has_exclusive_lock; - f.m_has_exclusive_lock = false; -#endif -#endif - m_have_lock = f.m_have_lock; - f.m_have_lock = false; - m_encryption_key = std::move(f.m_encryption_key); - return *this; -} - -inline void File::open(const std::string& path, Mode m) +inline void File::open(std::string_view path, Mode m) { AccessMode a = access_ReadWrite; CreateMode c = create_Auto; @@ -1053,13 +940,13 @@ inline void File::open(const std::string& path, Mode m) open(path, a, c, flags); } -inline void File::open(const std::string& path, AccessMode am, CreateMode cm, int flags) +inline void File::open(std::string_view path, AccessMode am, CreateMode cm, int flags) { open_internal(path, am, cm, flags, nullptr); } -inline void File::open(const std::string& path, bool& was_created) +inline void File::open(std::string_view path, bool& was_created) { while (1) { bool success; @@ -1110,40 +997,28 @@ inline bool File::try_lock() return lock(true, true); } -inline File::MapBase::~MapBase() noexcept -{ - unmap(); -} - template -inline File::Map::Map(const File& f, AccessMode a, size_t size, int map_flags, util::WriteObserver* observer) +inline File::Map::Map(const File& f, AccessMode a, size_t size, util::WriteObserver* observer) { - map(f, a, size, map_flags, 0, observer); + map(f, a, size, 0, observer); } template -inline File::Map::Map(const File& f, size_t offset, AccessMode a, size_t size, int map_flags, - util::WriteObserver* observer) +inline File::Map::Map(const File& f, SizeType offset, AccessMode a, size_t size, util::WriteObserver* observer) { - map(f, a, size, map_flags, offset, observer); + map(f, a, size, offset, observer); } template -inline File::Map::Map() noexcept +inline T* File::Map::map(const File& f, AccessMode a, size_t size, SizeType offset, util::WriteObserver* observer) { -} - -template -inline T* File::Map::map(const File& f, AccessMode a, size_t size, int map_flags, size_t offset, - util::WriteObserver* observer) -{ - MapBase::map(f, a, size, map_flags, offset, observer); + MapBase::map(f, a, size, offset, observer); return static_cast(m_addr); } template -inline bool File::Map::try_reserve(const File& f, AccessMode a, size_t size, size_t offset, +inline bool File::Map::try_reserve(const File& f, AccessMode a, size_t size, SizeType offset, util::WriteObserver* observer) { return MapBase::try_reserve(f, a, size, offset, observer); @@ -1156,13 +1031,11 @@ inline void File::Map::unmap() noexcept } template -inline T* File::Map::remap(const File& f, AccessMode a, size_t size, int map_flags) +inline T* File::Map::remap(const File& f, AccessMode a, size_t size) { - // MapBase::remap(f, a, size, map_flags); // missing sync() here? unmap(); - map(f, a, size, map_flags); - + map(f, a, size); return static_cast(m_addr); } @@ -1172,18 +1045,6 @@ inline bool File::Map::try_extend_to(size_t size) noexcept return MapBase::try_extend_to(sizeof(T) * size); } -template -inline void File::Map::sync() -{ - MapBase::sync(); -} - -template -inline void File::Map::flush() -{ - MapBase::flush(); -} - template inline bool File::Map::is_attached() const noexcept { @@ -1261,8 +1122,10 @@ inline void File::Streambuf::flush() { size_t n = pptr() - pbase(); if (n > 0) { - m_file.write(pbase(), n); + SizeType pos = m_file.get_file_pos(); + m_file.write(pos, pbase(), n); setp(m_buffer.get(), epptr()); + m_file.seek(pos + n); } } @@ -1312,7 +1175,6 @@ inline bool operator>=(const File::UniqueID& lhs, const File::UniqueID& rhs) { return !(lhs < rhs); } - } // namespace realm::util #endif // REALM_UTIL_FILE_HPP diff --git a/src/realm/util/file_mapper.cpp b/src/realm/util/file_mapper.cpp index 38cbed71bcd..f4323cad7c2 100644 --- a/src/realm/util/file_mapper.cpp +++ b/src/realm/util/file_mapper.cpp @@ -16,9 +16,15 @@ * **************************************************************************/ +#include + +#include +#include +#include #include +#include -#include +#include #ifdef _WIN32 #include @@ -27,16 +33,11 @@ #include #endif -#include -#include -#include -#include -#include - #if REALM_ENABLE_ENCRYPTION #include #include +#include #include #include @@ -44,15 +45,7 @@ #include #include #include -#include -#include -#include #include - -#include -#include -#include -#include #include // for memset #if REALM_PLATFORM_APPLE @@ -62,572 +55,97 @@ #endif // enable encryption namespace { - inline bool is_mmap_memory_error(int err) { return (err == EAGAIN || err == EMFILE || err == ENOMEM); } - } // Unnamed namespace -using namespace realm; -using namespace realm::util; - -namespace realm { -namespace util { - +namespace realm::util { size_t round_up_to_page_size(size_t size) noexcept { return (size + page_size() - 1) & ~(page_size() - 1); } - -#if REALM_ENABLE_ENCRYPTION - -// A list of all of the active encrypted mappings for a single file -struct mappings_for_file { - File::UniqueID file_unique_id; - std::shared_ptr info; -}; - -// Group the information we need to map a SIGSEGV address to an -// EncryptedFileMapping for the sake of cache-friendliness with 3+ active -// mappings (and no worse with only two) -struct mapping_and_addr { - std::shared_ptr mapping; - void* addr; - size_t size; -}; - -util::Mutex& mapping_mutex = *(new util::Mutex); -namespace { -std::vector& mappings_by_addr = *new std::vector; -std::vector& mappings_by_file = *new std::vector; -static unsigned int file_reclaim_index = 0; -static std::atomic num_decrypted_pages(0); // this is for statistical purposes -static std::atomic reclaimer_target(0); // do. -static std::atomic reclaimer_workload(0); // do. -// helpers - -int64_t fetch_value_in_file(const std::string& fname, const char* scan_pattern) -{ - std::ifstream file(fname); - if (file) { - std::stringstream buffer; - buffer << file.rdbuf(); - - std::string s = buffer.str(); - std::smatch m; - std::regex e(scan_pattern); - - if (std::regex_search(s, m, e)) { - std::string ibuf = m[1]; - return strtol(ibuf.c_str(), nullptr, 10); - } - } - return PageReclaimGovernor::no_match; -} - -/* Default reclaim governor - * - */ - -class DefaultGovernor : public PageReclaimGovernor { -public: - static int64_t pick_lowest_valid(int64_t a, int64_t b) - { - if (a == PageReclaimGovernor::no_match) - return b; - if (b == PageReclaimGovernor::no_match) - return a; - return std::min(a, b); - } - - static int64_t pick_if_valid(int64_t source, int64_t target) - { - if (source == PageReclaimGovernor::no_match) - return PageReclaimGovernor::no_match; - return target; - } - - static int64_t get_target_from_system(const std::string& cfg_file_name) - { - int64_t target; - auto local_spec = fetch_value_in_file(cfg_file_name, "target ([[:digit:]]+)"); - if (local_spec != no_match) { // overrides everything! - target = local_spec; - } - else { - // no local spec, try to deduce something reasonable from platform info - auto from_proc = fetch_value_in_file("/proc/meminfo", "MemTotal:[[:space:]]+([[:digit:]]+) kB") * 1024; - auto from_cgroup = fetch_value_in_file("/sys/fs/cgroup/memory/memory.limit_in_bytes", "^([[:digit:]]+)"); - auto cache_use = fetch_value_in_file("/sys/fs/cgroup/memory/memory.stat", "cache ([[:digit:]]+)"); - target = pick_if_valid(from_proc, from_proc / 4); - target = pick_lowest_valid(target, pick_if_valid(from_cgroup, from_cgroup / 4)); - target = pick_lowest_valid(target, pick_if_valid(cache_use, cache_use)); - } - return target; - } - - util::UniqueFunction current_target_getter(size_t load) override - { - static_cast(load); - if (m_refresh_count > 0) { - --m_refresh_count; - return [target = m_target] { - return target; - }; - } - m_refresh_count = 10; - - return [file_name = m_cfg_file_name] { - return get_target_from_system(file_name); - }; - } - - void report_target_result(int64_t target) override - { - m_target = target; - } - - DefaultGovernor() - { - auto cfg_name = getenv("REALM_PAGE_GOVERNOR_CFG"); - if (cfg_name) { - m_cfg_file_name = cfg_name; - } - } - -private: - std::string m_cfg_file_name; - int64_t m_target = 0; - int m_refresh_count = 0; -}; - -static DefaultGovernor default_governor; -static PageReclaimGovernor* governor = &default_governor; - -void reclaim_pages(); - -#if !REALM_PLATFORM_APPLE -static std::atomic reclaimer_shutdown(false); -static std::unique_ptr reclaimer_thread; - -static void ensure_reclaimer_thread_runs() -{ - if (reclaimer_thread == nullptr) { - reclaimer_thread = std::make_unique([] { - while (!reclaimer_shutdown) { - reclaim_pages(); - millisleep(1000); - } - }); - } -} - -struct ReclaimerThreadStopper { - ~ReclaimerThreadStopper() - { - if (reclaimer_thread) { - reclaimer_shutdown = true; - reclaimer_thread->join(); - } - } -} reclaimer_thread_stopper; -#else // REALM_PLATFORM_APPLE -static dispatch_source_t reclaimer_timer; -static dispatch_queue_t reclaimer_queue; - -static void ensure_reclaimer_thread_runs() +void* mmap(const FileAttributes& file, size_t size, uint64_t offset, std::unique_ptr& mapping) { - if (!reclaimer_timer) { - reclaimer_queue = dispatch_queue_create_with_target("io.realm.page-reclaimer", DISPATCH_QUEUE_SERIAL, - dispatch_get_global_queue(QOS_CLASS_BACKGROUND, 0)); - reclaimer_timer = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, reclaimer_queue); - dispatch_source_set_timer(reclaimer_timer, DISPATCH_TIME_NOW, NSEC_PER_SEC, NSEC_PER_SEC); - dispatch_source_set_event_handler(reclaimer_timer, ^{ - reclaim_pages(); - }); - dispatch_resume(reclaimer_timer); - } -} - -struct ReclaimerThreadStopper { - ~ReclaimerThreadStopper() - { - if (reclaimer_timer) { - dispatch_source_cancel(reclaimer_timer); - // Block until any currently-running timer tasks are done - dispatch_sync(reclaimer_queue, ^{ - }); - dispatch_release(reclaimer_timer); - dispatch_release(reclaimer_queue); - } +#if REALM_ENABLE_ENCRYPTION + _impl::SimulatedFailure::trigger_mmap(size); + if (file.encryption) { + auto page_start = offset & ~(page_size() - 1); + size += offset - page_start; + size = round_up_to_page_size(size); + void* addr = mmap_anon(size); + mapping = file.encryption->add_mapping(page_start, addr, size, file.access); // FIXME: leaks if throws + return static_cast(addr) - page_start + offset; } -} reclaimer_thread_stopper; + mapping = nullptr; +#else + static_cast(mapping); #endif -} // anonymous namespace - -void set_page_reclaim_governor(PageReclaimGovernor* new_governor) -{ - UniqueLock lock(mapping_mutex); - governor = new_governor ? new_governor : &default_governor; - ensure_reclaimer_thread_runs(); -} - -size_t get_num_decrypted_pages() -{ - return num_decrypted_pages.load(); -} - -void encryption_note_reader_start(SharedFileInfo& info, const void* reader_id) -{ - UniqueLock lock(mapping_mutex); - ensure_reclaimer_thread_runs(); - auto j = std::find_if(info.readers.begin(), info.readers.end(), [=](auto& reader) { - return reader.reader_ID == reader_id; - }); - if (j == info.readers.end()) { - ReaderInfo i = {reader_id, info.current_version}; - info.readers.push_back(i); - } - else { - j->version = info.current_version; - } - ++info.current_version; -} - -void encryption_note_reader_end(SharedFileInfo& info, const void* reader_id) noexcept -{ - UniqueLock lock(mapping_mutex); - for (auto j = info.readers.begin(); j != info.readers.end(); ++j) - if (j->reader_ID == reader_id) { - // move last over - *j = info.readers.back(); - info.readers.pop_back(); - return; - } -} - -void encryption_mark_pages_for_IV_check(EncryptedFileMapping* mapping) -{ - UniqueLock lock(mapping_mutex); - mapping->mark_pages_for_IV_check(); -} - -namespace { -size_t collect_total_workload() // must be called under lock -{ - size_t total = 0; - for (auto i = mappings_by_file.begin(); i != mappings_by_file.end(); ++i) { - SharedFileInfo& info = *i->info; - info.num_decrypted_pages = 0; - for (auto it = info.mappings.begin(); it != info.mappings.end(); ++it) { - info.num_decrypted_pages += (*it)->collect_decryption_count(); - } - total += info.num_decrypted_pages; - } - return total; -} - -/* Compute the amount of work allowed in an attempt to reclaim pages. - * please refer to EncryptedFileMapping::reclaim_untouched() for more details. - * - * The function starts slowly when the load is 0.5 of target, then turns - * up the volume as the load nears 1.0 - where it sets a work limit of 10%. - * Since the work is expressed (roughly) in terms of pages released, this means - * that about 10 runs has to take place to reclaim all pages possible - though - * if successful the load will rapidly decrease, turning down the work limit. - */ - -struct work_limit_desc { - float base; - float effort; -}; -const std::vector control_table = {{0.5f, 0.001f}, {0.75f, 0.002f}, {0.8f, 0.003f}, - {0.85f, 0.005f}, {0.9f, 0.01f}, {0.95f, 0.03f}, - {1.0f, 0.1f}, {1.5f, 0.2f}, {2.0f, 0.3f}}; - -size_t get_work_limit(size_t decrypted_pages, size_t target) -{ - if (target == 0) - target = 1; - float load = 1.0f * decrypted_pages / target; - float akku = 0.0f; - for (const auto& e : control_table) { - if (load <= e.base) - break; - akku += (load - e.base) * e.effort; - } - size_t work_limit = size_t(target * akku); - return work_limit; -} -/* Find the oldest version that is still of interest to somebody */ -uint64_t get_oldest_version(SharedFileInfo& info) // must be called under lock -{ - auto oldest_version = info.current_version; - for (const auto& e : info.readers) { - if (e.version < oldest_version) { - oldest_version = e.version; - } - } - return oldest_version; -} - -// Reclaim pages for ONE file, limited by a given work limit. -void reclaim_pages_for_file(SharedFileInfo& info, size_t& work_limit) -{ - uint64_t oldest_version = get_oldest_version(info); - if (info.last_scanned_version < oldest_version || info.mappings.empty()) { - // locate the mapping matching the progress index. No such mapping may - // exist, and if so, we'll update the index to the next mapping - for (auto& e : info.mappings) { - auto start_index = e->get_start_index(); - if (info.progress_index < start_index) { - info.progress_index = start_index; - } - if (info.progress_index <= e->get_end_index()) { - e->reclaim_untouched(info.progress_index, work_limit); - if (work_limit == 0) - return; - } - } - // if we get here, all mappings have been considered - info.progress_index = 0; - info.last_scanned_version = info.current_version; - ++info.current_version; - } -} - -// Reclaim pages from all files, limited by a work limit that is derived -// from a target for the amount of dirty (decrypted) pages. The target is -// set by the governor function. -void reclaim_pages() -{ - size_t load; - util::UniqueFunction runnable; - { - UniqueLock lock(mapping_mutex); - load = collect_total_workload(); - num_decrypted_pages = load; - runnable = governor->current_target_getter(load * page_size()); - } - // callback to governor defined function without mutex held - int64_t target = PageReclaimGovernor::no_match; - if (runnable) { - target = runnable(); - } - { - UniqueLock lock(mapping_mutex); - reclaimer_workload = 0; - reclaimer_target = size_t(target / page_size()); - // Putting the target back into the govenor object will allow the govenor - // to return a getter producing this value again next time it is called - governor->report_target_result(target); - - if (target == PageReclaimGovernor::no_match) // temporarily disabled by governor returning no_match - return; - - if (mappings_by_file.size() == 0) - return; - - size_t work_limit = get_work_limit(load, reclaimer_target); - reclaimer_workload = work_limit; - if (file_reclaim_index >= mappings_by_file.size()) - file_reclaim_index = 0; - - while (work_limit > 0) { - SharedFileInfo& info = *mappings_by_file[file_reclaim_index].info; - reclaim_pages_for_file(info, work_limit); - if (work_limit > 0) { // consider next file: - ++file_reclaim_index; - if (file_reclaim_index >= mappings_by_file.size()) - return; - } - } - } -} - - -mapping_and_addr* find_mapping_for_addr(void* addr, size_t size) -{ - for (size_t i = 0; i < mappings_by_addr.size(); ++i) { - mapping_and_addr& m = mappings_by_addr[i]; - if (m.addr == addr && m.size == size) - return &m; - REALM_ASSERT(m.addr != addr); - } - - return 0; -} -} // anonymous namespace - -SharedFileInfo* get_file_info_for_file(File& file) -{ - LockGuard lock(mapping_mutex); - File::UniqueID id = file.get_unique_id(); - std::vector::iterator it; - for (it = mappings_by_file.begin(); it != mappings_by_file.end(); ++it) { - if (it->file_unique_id == id) { +#ifndef _WIN32 + int prot = PROT_READ; + switch (file.access) { + case File::access_ReadWrite: + prot |= PROT_WRITE; break; - } - } - if (it == mappings_by_file.end()) - return nullptr; - else - return it->info.get(); -} - -namespace { -EncryptedFileMapping* add_mapping(void* addr, size_t size, const FileAttributes& file, size_t file_offset) -{ - size_t fs = to_size_t(File::get_size_static(file.fd)); - if (fs > 0 && fs < c_min_encrypted_file_size) - throw DecryptionFailed( - util::format("file size %1 is less than the minimum encrypted file size of %2 for '%3'", fs, - c_min_encrypted_file_size, file.path)); - - LockGuard lock(mapping_mutex); - - File::UniqueID fuid = File::get_unique_id(file.fd, file.path); - - std::vector::iterator it; - for (it = mappings_by_file.begin(); it != mappings_by_file.end(); ++it) { - if (it->file_unique_id == fuid) { + case File::access_ReadOnly: break; - } } - // Get the potential memory allocation out of the way so that mappings_by_addr.push_back can't throw - mappings_by_addr.reserve(mappings_by_addr.size() + 1); - - if (it == mappings_by_file.end()) { - mappings_by_file.reserve(mappings_by_file.size() + 1); - mappings_for_file f; - f.info = std::make_shared(reinterpret_cast(file.encryption_key)); - f.info->fd = File::dup_file_desc(file.fd); - f.file_unique_id = fuid; + void* addr = ::mmap(nullptr, size, prot, MAP_SHARED, file.fd, offset); + if (addr != MAP_FAILED) + return addr; - mappings_by_file.push_back(f); // can't throw due to reserve() above - it = mappings_by_file.end() - 1; - } - else { - it->info->cryptor.check_key(reinterpret_cast(file.encryption_key)); + int err = errno; // Eliminate any risk of clobbering + if (is_mmap_memory_error(err)) { + throw AddressSpaceExhausted(util::format("mmap() failed: %1 (size: %2, offset: %3)", + make_basic_system_error_code(err).message(), size, offset)); } - try { - mapping_and_addr m; - m.addr = addr; - m.size = size; - m.mapping = std::make_shared(*it->info, file_offset, addr, size, file.access); - mappings_by_addr.push_back(m); // can't throw due to reserve() above - return m.mapping.get(); - } - catch (...) { - if (it->info->mappings.empty()) { - FileDesc fd_to_close = it->info->fd; - mappings_by_file.erase(it); - File::close_static(fd_to_close); // Throws - } - throw; - } -} + throw SystemError(err, util::format("mmap() failed (size: %1, offset: %2", size, offset)); -void remove_mapping(void* addr, size_t size) -{ - size = round_up_to_page_size(size); - LockGuard lock(mapping_mutex); - mapping_and_addr* m = find_mapping_for_addr(addr, size); - if (!m) - return; - - mappings_by_addr.erase(mappings_by_addr.begin() + (m - &mappings_by_addr[0])); - - for (std::vector::iterator it = mappings_by_file.begin(); it != mappings_by_file.end(); ++it) { - if (it->info->mappings.empty()) { - FileDesc fd_to_close = it->info->fd; - mappings_by_file.erase(it); - File::close_static(fd_to_close); // Throws +#else + DWORD protect = PAGE_READONLY; + DWORD desired_access = FILE_MAP_READ; + switch (file.access) { + case File::access_ReadOnly: + break; + case File::access_ReadWrite: + protect = PAGE_READWRITE; + desired_access = FILE_MAP_WRITE; break; - } - } -} -} // anonymous namespace - -void* mmap(const FileAttributes& file, size_t size, size_t offset, EncryptedFileMapping*& mapping) -{ - _impl::SimulatedFailure::trigger_mmap(size); - if (file.encryption_key) { - size = round_up_to_page_size(size); - void* addr = mmap_anon(size); - mapping = add_mapping(addr, size, file, offset); - return addr; - } - else { - mapping = nullptr; - return mmap(file, size, offset); } -} - - -EncryptedFileMapping* reserve_mapping(void* addr, const FileAttributes& file, size_t offset) -{ - return add_mapping(addr, 0, file, offset); -} - -void extend_encrypted_mapping(EncryptedFileMapping* mapping, void* addr, size_t offset, size_t old_size, - size_t new_size) -{ - LockGuard lock(mapping_mutex); - auto m = find_mapping_for_addr(addr, old_size); - REALM_ASSERT(m); - m->size = new_size; - mapping->extend_to(offset, new_size); -} - -void remove_encrypted_mapping(void* addr, size_t size) -{ - remove_mapping(addr, size); -} + LARGE_INTEGER large_int; + if (int_cast_with_overflow_detect(offset + size, large_int.QuadPart)) + throw std::runtime_error("Map size is too large"); + HANDLE map_handle = CreateFileMappingFromApp(file.fd, 0, protect, offset + size, nullptr); + if (!map_handle) + throw AddressSpaceExhausted(get_errno_msg("CreateFileMapping() failed: ", GetLastError()) + + " size: " + util::to_string(size) + " offset: " + util::to_string(offset)); + + if (int_cast_with_overflow_detect(offset, large_int.QuadPart)) + throw RuntimeError(ErrorCodes::RangeError, "Map offset is too large"); + + SIZE_T _size = size; + void* addr = MapViewOfFileFromApp(map_handle, desired_access, offset, _size); + BOOL r = CloseHandle(map_handle); + REALM_ASSERT_RELEASE(r); + if (!addr) + throw AddressSpaceExhausted(get_errno_msg("MapViewOfFileFromApp() failed: ", GetLastError()) + + " size: " + util::to_string(_size) + " offset: " + util::to_string(offset)); -void* mmap_reserve(const FileAttributes& file, size_t reservation_size, size_t offset_in_file, - EncryptedFileMapping*& mapping) -{ - auto addr = mmap_reserve(file.fd, reservation_size, offset_in_file); - if (file.encryption_key) { - REALM_ASSERT(reservation_size == round_up_to_page_size(reservation_size)); - // we create a mapping for the entire reserved area. This causes full initialization of some fairly - // large std::vectors, which it would be nice to avoid. This is left as a future optimization. - mapping = add_mapping(addr, reservation_size, file, offset_in_file); - } - else { - mapping = nullptr; - } return addr; +#endif } -void* mmap_fixed(FileDesc fd, void* address_request, size_t size, File::AccessMode access, size_t offset, - const char* enc_key, EncryptedFileMapping* encrypted_mapping) +#if REALM_ENABLE_ENCRYPTION +std::unique_ptr reserve_mapping(void* addr, const FileAttributes& file, uint64_t offset) { - REALM_ASSERT((enc_key == nullptr) == - (encrypted_mapping == nullptr)); // Mapping must already have been set if encryption is used - if (encrypted_mapping) { -// Since the encryption layer must be able to WRITE into the memory area, -// we have to map it read/write regardless of the request. -// FIXME: Make this work for windows! -#ifdef _WIN32 - return nullptr; -#else - return ::mmap(address_request, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); -#endif - } - else { - return mmap_fixed(fd, address_request, size, access, offset, enc_key); - } + return file.encryption->add_mapping(offset, addr, 0, file.access); } - #endif // REALM_ENABLE_ENCRYPTION void* mmap_anon(size_t size) @@ -665,15 +183,10 @@ void* mmap_anon(size_t size) #endif } -void* mmap_fixed(FileDesc fd, void* address_request, size_t size, File::AccessMode access, size_t offset, - const char* enc_key) +#ifndef _WIN32 +void* mmap_fixed(FileDesc fd, void* address_request, size_t size, File::AccessMode access, uint64_t offset) { _impl::SimulatedFailure::trigger_mmap(size); - static_cast(enc_key); // FIXME: Consider removing this parameter -#ifdef _WIN32 - REALM_ASSERT(false); - return nullptr; // silence warning -#else auto prot = PROT_READ; if (access == File::access_ReadWrite) prot |= PROT_WRITE; @@ -683,111 +196,15 @@ void* mmap_fixed(FileDesc fd, void* address_request, size_t size, File::AccessMo ", when mapping an already reserved memory area"); } return addr; -#endif } +#endif // _WIN32 -void* mmap_reserve(FileDesc fd, size_t reservation_size, size_t offset_in_file) -{ - // The other mmap operations take an fd as a parameter, so we do too. - // We're not using it for anything currently, but this may change. - // Similarly for offset_in_file. - static_cast(fd); - static_cast(offset_in_file); -#ifdef _WIN32 - REALM_ASSERT(false); // unsupported on windows - return nullptr; -#else - auto addr = ::mmap(0, reservation_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (addr == MAP_FAILED) { - throw std::runtime_error(get_errno_msg("mmap() failed: ", errno)); - } - return addr; -#endif -} - - -void* mmap(const FileAttributes& file, size_t size, size_t offset) -{ - _impl::SimulatedFailure::trigger_mmap(size); -#if REALM_ENABLE_ENCRYPTION - if (file.encryption_key) { - size = round_up_to_page_size(size); - void* addr = mmap_anon(size); - add_mapping(addr, size, file, offset); - return addr; - } - else -#else - REALM_ASSERT(!file.encryption_key); -#endif - { - -#ifndef _WIN32 - int prot = PROT_READ; - switch (file.access) { - case File::access_ReadWrite: - prot |= PROT_WRITE; - break; - case File::access_ReadOnly: - break; - } - - void* addr = ::mmap(nullptr, size, prot, MAP_SHARED, file.fd, offset); - if (addr != MAP_FAILED) - return addr; - - int err = errno; // Eliminate any risk of clobbering - if (is_mmap_memory_error(err)) { - throw AddressSpaceExhausted(get_errno_msg("mmap() failed: ", err) + " size: " + util::to_string(size) + - " offset: " + util::to_string(offset)); - } - - throw SystemError(err, std::string("mmap() failed (size: ") + util::to_string(size) + - ", offset: " + util::to_string(offset)); - -#else - // FIXME: Is there anything that we must do on Windows to honor map_NoSync? - - DWORD protect = PAGE_READONLY; - DWORD desired_access = FILE_MAP_READ; - switch (file.access) { - case File::access_ReadOnly: - break; - case File::access_ReadWrite: - protect = PAGE_READWRITE; - desired_access = FILE_MAP_WRITE; - break; - } - LARGE_INTEGER large_int; - if (int_cast_with_overflow_detect(offset + size, large_int.QuadPart)) - throw std::runtime_error("Map size is too large"); - HANDLE map_handle = CreateFileMappingFromApp(file.fd, 0, protect, offset + size, nullptr); - if (!map_handle) - throw AddressSpaceExhausted(get_errno_msg("CreateFileMapping() failed: ", GetLastError()) + - " size: " + util::to_string(size) + " offset: " + util::to_string(offset)); - - if (int_cast_with_overflow_detect(offset, large_int.QuadPart)) - throw RuntimeError(ErrorCodes::RangeError, "Map offset is too large"); - - SIZE_T _size = size; - void* addr = MapViewOfFileFromApp(map_handle, desired_access, offset, _size); - BOOL r = CloseHandle(map_handle); - REALM_ASSERT_RELEASE(r); - if (!addr) - throw AddressSpaceExhausted(get_errno_msg("MapViewOfFileFromApp() failed: ", GetLastError()) + - " size: " + util::to_string(_size) + " offset: " + util::to_string(offset)); - - return addr; -#endif - } -} void munmap(void* addr, size_t size) { -#if REALM_ENABLE_ENCRYPTION - remove_mapping(addr, size); -#endif - + auto shift = reinterpret_cast(addr) & (page_size() - 1); + addr = static_cast(addr) - shift; + size += shift; #ifdef _WIN32 if (!UnmapViewOfFile(addr)) throw std::system_error(GetLastError(), std::system_category(), "UnmapViewOfFile() failed"); @@ -800,102 +217,8 @@ void munmap(void* addr, size_t size) #endif } -void* mremap(const FileAttributes& file, size_t file_offset, void* old_addr, size_t old_size, size_t new_size) -{ -#if REALM_ENABLE_ENCRYPTION - if (file.encryption_key) { - LockGuard lock(mapping_mutex); - size_t rounded_old_size = round_up_to_page_size(old_size); - if (mapping_and_addr* m = find_mapping_for_addr(old_addr, rounded_old_size)) { - size_t rounded_new_size = round_up_to_page_size(new_size); - if (rounded_old_size == rounded_new_size) - return old_addr; - - void* new_addr = mmap_anon(rounded_new_size); - m->mapping->set(new_addr, rounded_new_size, file_offset); - m->addr = new_addr; - m->size = rounded_new_size; -#ifdef _WIN32 - if (!UnmapViewOfFile(old_addr)) - throw std::system_error(GetLastError(), std::system_category(), "UnmapViewOfFile() failed"); -#else - if (::munmap(old_addr, rounded_old_size)) { - int err = errno; - throw std::system_error(err, std::system_category(), "munmap() failed"); - } -#endif - return new_addr; - } - // If we are using encryption, we must have used mmap and the mapping - // must have been added to the cache therefore find_mapping_for_addr() - // will succeed. Otherwise we would continue to mmap it below without - // the encryption key which is an error. - REALM_UNREACHABLE(); - } -#endif - -#ifdef _GNU_SOURCE - { - void* new_addr = ::mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE); - if (new_addr != MAP_FAILED) - return new_addr; - int err = errno; // Eliminate any risk of clobbering - // Do not throw here if mremap is declared as "not supported" by the - // platform Eg. When compiling with GNU libc on OSX, iOS. - // In this case fall through to no-mremap case below. - if (err != ENOTSUP && err != ENOSYS) { - if (is_mmap_memory_error(err)) { - throw AddressSpaceExhausted(get_errno_msg("mremap() failed: ", err) + " old size: " + - util::to_string(old_size) + " new size: " + util::to_string(new_size)); - } - throw std::system_error(err, std::system_category(), - std::string("_gnu_src mmap() failed (") + "old_size: " + - util::to_string(old_size) + ", new_size: " + util::to_string(new_size) + ")"); - } - } -#endif - - void* new_addr = mmap(file, new_size, file_offset); - -#ifdef _WIN32 - if (!UnmapViewOfFile(old_addr)) - throw std::system_error(GetLastError(), std::system_category(), "UnmapViewOfFile() failed"); -#else - if (::munmap(old_addr, old_size) != 0) { - int err = errno; - throw std::system_error(err, std::system_category(), "munmap() failed"); - } -#endif - - return new_addr; -} - void msync(FileDesc fd, void* addr, size_t size) { -#if REALM_ENABLE_ENCRYPTION - { - // first check the encrypted mappings - LockGuard lock(mapping_mutex); - if (mapping_and_addr* m = find_mapping_for_addr(addr, round_up_to_page_size(size))) { - m->mapping->flush(); - m->mapping->sync(); - return; - } - } -#endif - - // not an encrypted mapping - - // FIXME: on iOS/OSX fsync may not be enough to ensure crash safety. - // Consider adding fcntl(F_FULLFSYNC). This most likely also applies to msync. - // - // See description of fsync on iOS here: - // https://developer.apple.com/library/ios/documentation/System/Conceptual/ManPages_iPhoneOS/man2/fsync.2.html - // - // See also - // https://developer.apple.com/library/ios/documentation/Cocoa/Conceptual/CoreData/Articles/cdPersistentStores.html - // for a discussion of this related to core data. - #ifdef _WIN32 // FlushViewOfFile() is asynchronous and won't flush metadata (file size, etc) if (!FlushViewOfFile(addr, size)) { @@ -918,5 +241,22 @@ void msync(FileDesc fd, void* addr, size_t size) } #endif } -} // namespace util -} // namespace realm + +#if REALM_ENABLE_ENCRYPTION +void do_encryption_read_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping, bool to_modify) +{ + mapping->read_barrier(addr, size, to_modify); +} + +void do_encryption_write_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping) +{ + mapping->write_barrier(addr, size); +} + +EncryptedFile::EncryptedFile(const char* key) + : cryptor(key) +{ +} +#endif // REALM_ENABLE_ENCRYPTION + +} // namespace realm::util diff --git a/src/realm/util/file_mapper.hpp b/src/realm/util/file_mapper.hpp index d4cd667823d..9842a986a55 100644 --- a/src/realm/util/file_mapper.hpp +++ b/src/realm/util/file_mapper.hpp @@ -21,169 +21,62 @@ #include #include -#include -#include -#include - -#include -#include - -namespace realm { -namespace util { +namespace realm::util { struct FileAttributes { FileDesc fd; - std::string path; File::AccessMode access; - const char* encryption_key = nullptr; + EncryptedFile* encryption; }; -void* mmap(const FileAttributes& file, size_t size, size_t offset); -void* mmap_fixed(FileDesc fd, void* address_request, size_t size, File::AccessMode access, size_t offset, - const char* enc_key); -void* mmap_reserve(FileDesc fd, size_t size, size_t offset); +class EncryptedFileMapping; + +void* mmap(const FileAttributes& file, size_t size, uint64_t offset, std::unique_ptr& mapping); +void* mmap_fixed(FileDesc fd, void* address_request, size_t size, File::AccessMode access, uint64_t offset); void munmap(void* addr, size_t size); -void* mremap(const FileAttributes& file, size_t file_offset, void* old_addr, size_t old_size, size_t new_size); void msync(FileDesc fd, void* addr, size_t size); void* mmap_anon(size_t size); -// A function which may be given to encryption_read_barrier. If present, the read barrier is a -// a barrier for a full array. If absent, the read barrier is a barrier only for the address -// range give as argument. If the barrier is for a full array, it will read the array header -// and determine the address range from the header. -using HeaderToSize = size_t (*)(const char* addr); -class EncryptedFileMapping; - -class PageReclaimGovernor { -public: - // Called by the page reclaimer with the current load (in bytes) and - // must return the target load (also in bytes). Returns no_match if no - // target can be set - static constexpr int64_t no_match = -1; - virtual util::UniqueFunction current_target_getter(size_t load) = 0; - virtual void report_target_result(int64_t) = 0; -}; - -// Set a page reclaim governor. The governor is an object with a method which will be called periodically -// and must return a 'target' amount of memory to hold decrypted pages. The page reclaim daemon -// will then try to release pages to meet the target. The governor is called with the current -// amount of data used, for the purpose of logging - or possibly for computing the target -// -// The governor is called approximately once per second. -// -// If no governor is installed, the page reclaim daemon will not start. -void set_page_reclaim_governor(PageReclaimGovernor* governor); - -// Use the default governor. The default governor is used automatically if nothing else is set, so -// this funciton is mostly useful for tests where changing back to the default could be desirable. -inline void set_page_reclaim_governor_to_default() -{ - set_page_reclaim_governor(nullptr); -} - -// Retrieves the number of in memory decrypted pages, across all open files. -size_t get_num_decrypted_pages(); - #if REALM_ENABLE_ENCRYPTION -void encryption_note_reader_start(SharedFileInfo& info, const void* reader_id); -void encryption_note_reader_end(SharedFileInfo& info, const void* reader_id) noexcept; - -SharedFileInfo* get_file_info_for_file(File& file); +void* mmap_fixed(FileDesc fd, void* address_request, size_t size, File::AccessMode access, uint64_t offset); -// This variant allows the caller to obtain direct access to the encrypted file mapping -// for optimization purposes. -void* mmap(const FileAttributes& file, size_t size, size_t offset, EncryptedFileMapping*& mapping); -void* mmap_fixed(FileDesc fd, void* address_request, size_t size, File::AccessMode access, size_t offset, - const char* enc_key, EncryptedFileMapping* mapping); +std::unique_ptr reserve_mapping(void* addr, const FileAttributes& file, uint64_t offset); -void* mmap_reserve(const FileAttributes& file, size_t size, size_t offset, EncryptedFileMapping*& mapping); +void do_encryption_read_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping, bool to_modify); +void do_encryption_write_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping); -EncryptedFileMapping* reserve_mapping(void* addr, const FileAttributes& file, size_t offset); +#else -void extend_encrypted_mapping(EncryptedFileMapping* mapping, void* addr, size_t offset, size_t old_size, - size_t new_size); -void remove_encrypted_mapping(void* addr, size_t size); -void do_encryption_read_barrier(const void* addr, size_t size, HeaderToSize header_to_size, - EncryptedFileMapping* mapping, bool to_modify); +inline void do_encryption_read_barrier(const void*, size_t, EncryptedFileMapping*, bool) {} +inline void do_encryption_write_barrier(const void*, size_t, EncryptedFileMapping*) {} -void do_encryption_write_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping); +#endif -void inline encryption_read_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping, - HeaderToSize header_to_size = nullptr, bool to_modify = false) +inline void encryption_read_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping) { if (REALM_UNLIKELY(mapping)) - do_encryption_read_barrier(addr, size, header_to_size, mapping, to_modify); + do_encryption_read_barrier(addr, size, mapping, false); } -void inline encryption_read_barrier_for_write(const void* addr, size_t size, EncryptedFileMapping* mapping) +inline void encryption_read_barrier_for_write(const void* addr, size_t size, EncryptedFileMapping* mapping) { if (REALM_UNLIKELY(mapping)) - do_encryption_read_barrier(addr, size, nullptr, mapping, true); + do_encryption_read_barrier(addr, size, mapping, true); } -void inline encryption_write_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping) +inline void encryption_write_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping) { if (REALM_UNLIKELY(mapping)) do_encryption_write_barrier(addr, size, mapping); } - -extern util::Mutex& mapping_mutex; - -void inline encryption_flush(EncryptedFileMapping* mapping) -{ - UniqueLock lock(mapping_mutex); - mapping->flush(); -} - -inline void do_encryption_read_barrier(const void* addr, size_t size, HeaderToSize header_to_size, - EncryptedFileMapping* mapping, bool to_modify) -{ - UniqueLock lock(mapping_mutex); - mapping->read_barrier(addr, size, header_to_size, to_modify); -} - -inline void do_encryption_write_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping) -{ - LockGuard lock(mapping_mutex); - mapping->write_barrier(addr, size); -} - -#else - - -size_t inline get_num_decrypted_pages() -{ - return 0; -} - -void inline set_page_reclaim_governor(PageReclaimGovernor*) {} -void inline encryption_read_barrier(const void*, size_t, EncryptedFileMapping*, HeaderToSize = nullptr) {} -void inline encryption_read_barrier_for_write(const void*, size_t, EncryptedFileMapping*) {} -void inline encryption_write_barrier(const void*, size_t) {} -void inline encryption_write_barrier(const void*, size_t, EncryptedFileMapping*) {} -void inline do_encryption_read_barrier(const void*, size_t, HeaderToSize, EncryptedFileMapping*, bool) {} -void inline do_encryption_write_barrier(const void*, size_t, EncryptedFileMapping*) {} - -#endif - // helpers for encrypted Maps template void encryption_read_barrier(const File::Map& map, size_t index, size_t num_elements = 1) { if (auto mapping = map.get_encrypted_mapping(); REALM_UNLIKELY(mapping)) { - do_encryption_read_barrier(map.get_addr() + index, sizeof(T) * num_elements, nullptr, mapping, - map.is_writeable()); - } -} - -template -void encryption_read_barrier_for_write(const File::Map& map, size_t index, size_t num_elements = 1) -{ - if (auto mapping = map.get_encrypted_mapping(); REALM_UNLIKELY(mapping)) { - do_encryption_read_barrier(map.get_addr() + index, sizeof(T) * num_elements, nullptr, mapping, - map.is_writeable()); + do_encryption_read_barrier(map.get_addr() + index, sizeof(T) * num_elements, mapping, map.is_writeable()); } } @@ -194,13 +87,10 @@ void encryption_write_barrier(const File::Map& map, size_t index, size_t num_ do_encryption_write_barrier(map.get_addr() + index, sizeof(T) * num_elements, mapping); } } -void encryption_mark_pages_for_IV_check(EncryptedFileMapping* mapping); File::SizeType encrypted_size_to_data_size(File::SizeType size) noexcept; File::SizeType data_size_to_encrypted_size(File::SizeType size) noexcept; size_t round_up_to_page_size(size_t size) noexcept; - -} // namespace util -} // namespace realm +} // namespace realm::util #endif diff --git a/src/realm/util/load_file.cpp b/src/realm/util/load_file.cpp index 5eb79a444fe..8c0d2d9a36c 100644 --- a/src/realm/util/load_file.cpp +++ b/src/realm/util/load_file.cpp @@ -13,19 +13,10 @@ std::string util::load_file(const std::string& path) for (;;) { std::size_t min_extra_capacity = 256; buffer.reserve_extra(used_size, min_extra_capacity); // Throws - std::size_t n = file.read(buffer.data() + used_size, buffer.size() - used_size); // Throws + std::size_t n = file.read(used_size, buffer.data() + used_size, buffer.size() - used_size); // Throws if (n == 0) break; used_size += n; } return std::string(buffer.data(), used_size); // Throws } - - -std::string util::load_file_and_chomp(const std::string& path) -{ - std::string contents = load_file(path); // Throws - if (!contents.empty() && contents.back() == '\n') - contents.pop_back(); - return contents; -} diff --git a/src/realm/util/load_file.hpp b/src/realm/util/load_file.hpp index c12613a5d07..fe41bec980a 100644 --- a/src/realm/util/load_file.hpp +++ b/src/realm/util/load_file.hpp @@ -9,7 +9,6 @@ namespace util { // FIXME: These functions ought to be moved to in the // realm-core repository. std::string load_file(const std::string& path); -std::string load_file_and_chomp(const std::string& path); } // namespace util } // namespace realm diff --git a/src/realm/util/safe_int_ops.hpp b/src/realm/util/safe_int_ops.hpp index 659249716fc..f92af37b18a 100644 --- a/src/realm/util/safe_int_ops.hpp +++ b/src/realm/util/safe_int_ops.hpp @@ -25,11 +25,11 @@ #include #endif -#include - #include #include +#include + namespace realm { namespace util { @@ -55,17 +55,17 @@ namespace util { /// integers. template -inline bool int_equal_to(A, B) noexcept; +constexpr bool int_equal_to(A, B) noexcept; template -inline bool int_not_equal_to(A, B) noexcept; +constexpr bool int_not_equal_to(A, B) noexcept; template -inline bool int_less_than(A, B) noexcept; +constexpr bool int_less_than(A, B) noexcept; template -inline bool int_less_than_or_equal(A, B) noexcept; +constexpr bool int_less_than_or_equal(A, B) noexcept; template -inline bool int_greater_than(A, B) noexcept; +constexpr bool int_greater_than(A, B) noexcept; template -inline bool int_greater_than_or_equal(A, B) noexcept; +constexpr bool int_greater_than_or_equal(A, B) noexcept; //@} @@ -89,10 +89,10 @@ inline bool int_greater_than_or_equal(A, B) noexcept; /// integers. template -inline bool int_add_with_overflow_detect(L& lval, R rval) noexcept; +constexpr bool int_add_with_overflow_detect(L& lval, R rval) noexcept; template -inline bool int_subtract_with_overflow_detect(L& lval, R rval) noexcept; +constexpr bool int_subtract_with_overflow_detect(L& lval, R rval) noexcept; //@} @@ -113,7 +113,7 @@ inline bool int_subtract_with_overflow_detect(L& lval, R rval) noexcept; /// specializations of std::numeric_limits<> and that both are indeed /// integers. template -inline bool int_multiply_with_overflow_detect(L& lval, R rval) noexcept; +constexpr bool int_multiply_with_overflow_detect(L& lval, R rval) noexcept; /// Checks for positive overflow when performing a bitwise shift to @@ -128,7 +128,7 @@ inline bool int_multiply_with_overflow_detect(L& lval, R rval) noexcept; /// value of i must not exceed the number of bits of storage type T as /// shifting by this amount is not defined by the standard. template -inline bool int_shift_left_with_overflow_detect(T& lval, int i) noexcept; +constexpr bool int_shift_left_with_overflow_detect(T& lval, int i) noexcept; //@{ @@ -146,10 +146,10 @@ inline bool int_shift_left_with_overflow_detect(T& lval, int i) noexcept; /// except that it complies with at least C++03. template -bool int_cast_has_overflow(From from) noexcept; +constexpr bool int_cast_has_overflow(From from) noexcept; template -bool int_cast_with_overflow_detect(From from, To& to) noexcept; +constexpr bool int_cast_with_overflow_detect(From from, To& to) noexcept; //@} @@ -164,11 +164,11 @@ struct SafeIntBinopsImpl; template struct SafeIntBinopsImpl == std::is_signed_v>> { using common = std::common_type_t; - static bool equal(L l, R r) noexcept + constexpr static bool equal(L l, R r) noexcept { return common(l) == common(r); } - static bool less(L l, R r) noexcept + constexpr static bool less(L l, R r) noexcept { return common(l) < common(r); } @@ -179,11 +179,11 @@ template struct SafeIntBinopsImpl && std::is_signed_v>> { using lim_l = std::numeric_limits; using lim_r = std::numeric_limits; - static bool equal(L l, R r) noexcept + constexpr static bool equal(L l, R r) noexcept { return (lim_l::digits > lim_r::digits) ? r >= 0 && l == L(r) : R(l) == r; } - static bool less(L l, R r) noexcept + constexpr static bool less(L l, R r) noexcept { return (lim_l::digits > lim_r::digits) ? r >= 0 && l < L(r) : R(l) < r; } @@ -192,12 +192,12 @@ struct SafeIntBinopsImpl && std::is_ // (signed, unsigned) (all size combinations) template struct SafeIntBinopsImpl && !std::is_signed_v>> { - static bool equal(L l, R r) noexcept + constexpr static bool equal(L l, R r) noexcept { // r == l return SafeIntBinopsImpl::equal(r, l); } - static bool less(L l, R r) noexcept + constexpr static bool less(L l, R r) noexcept { // !(r == l || r < l) return !(SafeIntBinopsImpl::equal(r, l) || SafeIntBinopsImpl::less(r, l)); @@ -218,43 +218,43 @@ struct SafeIntBinops : SafeIntBinopsImpl { namespace util { template -inline bool int_equal_to(A a, B b) noexcept +constexpr bool int_equal_to(A a, B b) noexcept { return realm::_impl::SafeIntBinops::equal(a, b); } template -inline bool int_not_equal_to(A a, B b) noexcept +constexpr bool int_not_equal_to(A a, B b) noexcept { return !realm::_impl::SafeIntBinops::equal(a, b); } template -inline bool int_less_than(A a, B b) noexcept +constexpr bool int_less_than(A a, B b) noexcept { return realm::_impl::SafeIntBinops::less(a, b); } template -inline bool int_less_than_or_equal(A a, B b) noexcept +constexpr bool int_less_than_or_equal(A a, B b) noexcept { return !realm::_impl::SafeIntBinops::less(b, a); // Not greater than } template -inline bool int_greater_than(A a, B b) noexcept +constexpr bool int_greater_than(A a, B b) noexcept { return realm::_impl::SafeIntBinops::less(b, a); } template -inline bool int_greater_than_or_equal(A a, B b) noexcept +constexpr bool int_greater_than_or_equal(A a, B b) noexcept { return !realm::_impl::SafeIntBinops::less(a, b); // Not less than } template -inline bool int_add_with_overflow_detect(L& lval, R rval) noexcept +constexpr bool int_add_with_overflow_detect(L& lval, R rval) noexcept { // Note: MSVC returns true on success, while gcc/clang return true on overflow. // Note: Both may write to destination on overflow, but our tests check that this doesn't happen. @@ -270,7 +270,7 @@ inline bool int_add_with_overflow_detect(L& lval, R rval) noexcept } template -inline bool int_subtract_with_overflow_detect(L& lval, R rval) noexcept +constexpr bool int_subtract_with_overflow_detect(L& lval, R rval) noexcept { auto old = lval; #ifdef _MSC_VER @@ -284,7 +284,7 @@ inline bool int_subtract_with_overflow_detect(L& lval, R rval) noexcept } template -inline bool int_multiply_with_overflow_detect(L& lval, R rval) noexcept +constexpr bool int_multiply_with_overflow_detect(L& lval, R rval) noexcept { auto old = lval; #ifdef _MSC_VER @@ -298,7 +298,7 @@ inline bool int_multiply_with_overflow_detect(L& lval, R rval) noexcept } template -inline bool int_shift_left_with_overflow_detect(T& lval, int i) noexcept +constexpr bool int_shift_left_with_overflow_detect(T& lval, int i) noexcept { typedef std::numeric_limits lim; static_assert(lim::is_specialized, "std::numeric_limits<> must be specialized for T"); @@ -311,14 +311,14 @@ inline bool int_shift_left_with_overflow_detect(T& lval, int i) noexcept } template -inline bool int_cast_has_overflow(From from) noexcept +constexpr bool int_cast_has_overflow(From from) noexcept { typedef std::numeric_limits lim_to; return int_less_than(from, lim_to::min()) || int_less_than(lim_to::max(), from); } template -inline bool int_cast_with_overflow_detect(From from, To& to) noexcept +constexpr bool int_cast_with_overflow_detect(From from, To& to) noexcept { if (REALM_LIKELY(!int_cast_has_overflow(from))) { to = To(from); diff --git a/src/realm/utilities.hpp b/src/realm/utilities.hpp index fc3a9c5bd1a..2125fe2c2fa 100644 --- a/src/realm/utilities.hpp +++ b/src/realm/utilities.hpp @@ -121,10 +121,6 @@ REALM_FORCEINLINE bool sseavx() } void cpuid_init(); -void* round_up(void* p, size_t align); -void* round_down(void* p, size_t align); -constexpr size_t round_up(size_t p, size_t align); -constexpr size_t round_down(size_t p, size_t align); void millisleep(unsigned long milliseconds); #ifdef _WIN32 @@ -334,25 +330,25 @@ inline char toLowerAscii(char c) return c; } -inline void* round_up(void* p, size_t align) +inline void* round_up(void* p, uintptr_t align) { - size_t r = size_t(p) % align == 0 ? 0 : align - size_t(p) % align; + uintptr_t r = uintptr_t(p) % align == 0 ? 0 : align - uintptr_t(p) % align; return static_cast(p) + r; } -inline void* round_down(void* p, size_t align) +inline void* round_down(void* p, uintptr_t align) { - size_t r = size_t(p); + uintptr_t r = uintptr_t(p); return reinterpret_cast(r & ~(align - 1)); } -constexpr inline size_t round_up(size_t p, size_t align) +constexpr size_t round_up(size_t p, size_t align) { size_t r = p % align == 0 ? 0 : align - p % align; return p + r; } -constexpr inline size_t round_down(size_t p, size_t align) +constexpr size_t round_down(size_t p, size_t align) { size_t r = p; return r & (~(align - 1)); diff --git a/test/fuzz_group.cpp b/test/fuzz_group.cpp index eb423ae2b1a..f4c4e3c3c26 100644 --- a/test/fuzz_group.cpp +++ b/test/fuzz_group.cpp @@ -19,6 +19,7 @@ #include "fuzz_group.hpp" #include +#include #include #include diff --git a/test/object-store/sync/client_reset.cpp b/test/object-store/sync/client_reset.cpp index 0f99eaca474..4a5a6dd3d4a 100644 --- a/test/object-store/sync/client_reset.cpp +++ b/test/object-store/sync/client_reset.cpp @@ -1098,10 +1098,10 @@ TEST_CASE("sync: client reset", "[sync][pbs][client reset][baas]") { err = error; }; std::string fresh_path = realm::_impl::client_reset::get_fresh_path_for(local_config.path); - util::File f(fresh_path, util::File::Mode::mode_Write); - f.write("a non empty file"); - f.sync(); - f.close(); + { + util::File f(fresh_path, util::File::Mode::mode_Write); + f.write(0, "a non empty file"); + } make_reset(local_config, remote_config)->run(); REQUIRE(!err); diff --git a/test/object-store/sync/sync_manager.cpp b/test/object-store/sync/sync_manager.cpp index d3dcd1cac33..5805ab40560 100644 --- a/test/object-store/sync/sync_manager.cpp +++ b/test/object-store/sync/sync_manager.cpp @@ -146,9 +146,7 @@ TEST_CASE("App: path_for_realm API", "[sync][app][file]") { SyncConfig config(user, SyncConfig::FLXSyncEnabled{}); std::string path = app->path_for_realm(config, util::make_optional("custom.realm")); realm::test_util::TestPathGuard guard(path); - realm::util::File existing_realm_file(path, File::mode_Write); - existing_realm_file.write(std::string("test")); - existing_realm_file.sync(); + realm::util::File(path, File::mode_Write).write(0, "test"); REQUIRE(app->path_for_realm(config, util::make_optional("custom.realm")) == base_path / "custom.realm"); } diff --git a/test/object-store/util/sync/baas_admin_api.cpp b/test/object-store/util/sync/baas_admin_api.cpp index 9b7c6f3ffe6..4b0f5546e8a 100644 --- a/test/object-store/util/sync/baas_admin_api.cpp +++ b/test/object-store/util/sync/baas_admin_api.cpp @@ -419,8 +419,8 @@ class Baasaas { baas_coid)); } logger->info("Baasaas container started with id \"%1\"", m_container_id); - auto lock_file = util::File(std::string{s_baasaas_lock_file_name}, util::File::mode_Write); - lock_file.write(m_container_id); + util::File lock_file(s_baasaas_lock_file_name, util::File::mode_Write); + lock_file.write(0, m_container_id); } explicit Baasaas(std::string api_key, std::string baasaas_instance_id) diff --git a/test/realm-fuzzer/fuzz_configurator.cpp b/test/realm-fuzzer/fuzz_configurator.cpp index 90586ef6941..aa1d4ee4cf9 100644 --- a/test/realm-fuzzer/fuzz_configurator.cpp +++ b/test/realm-fuzzer/fuzz_configurator.cpp @@ -15,9 +15,13 @@ * limitations under the License. * **************************************************************************/ + #include "fuzz_configurator.hpp" + #include "fuzz_object.hpp" #include "../util/test_path.hpp" + +#include #include FuzzConfigurator::FuzzConfigurator(FuzzObject& fuzzer, const std::string& input, bool use_input_file, @@ -107,4 +111,4 @@ void FuzzConfigurator::print_cnf() !m_use_encryption ? "nullptr" : std::string("\"") + m_config.encryption_key.data() + "\""; m_log << "// const char* key = " << printable_key << ";\n"; m_log << "\n"; -} \ No newline at end of file +} diff --git a/test/realm-fuzzer/fuzz_configurator.hpp b/test/realm-fuzzer/fuzz_configurator.hpp index 33c3203a181..904ce828db9 100644 --- a/test/realm-fuzzer/fuzz_configurator.hpp +++ b/test/realm-fuzzer/fuzz_configurator.hpp @@ -15,12 +15,15 @@ * limitations under the License. * **************************************************************************/ + #ifndef FUZZ_CONFIG_HPP #define FUZZ_CONFIG_HPP #include "util.hpp" #include "fuzz_logger.hpp" + #include + #include #include @@ -49,4 +52,4 @@ class FuzzConfigurator { State m_state; std::string m_fuzz_name; }; -#endif \ No newline at end of file +#endif diff --git a/test/test_all.cpp b/test/test_all.cpp index eeae889332f..17d71f593ed 100644 --- a/test/test_all.cpp +++ b/test/test_all.cpp @@ -200,19 +200,6 @@ void set_random_seed() random_seed(unit_test_random_seed); } -class AggressiveGovernor : public util::PageReclaimGovernor { -public: - util::UniqueFunction current_target_getter(size_t) override - { - return []() { - return 4096; - }; - } - void report_target_result(int64_t) override {} -}; - -AggressiveGovernor aggressive_governor; - void set_always_encrypt() { if (const char* env = getenv("UNITTEST_ENCRYPT_ALL")) { @@ -222,8 +209,6 @@ void set_always_encrypt() } if (str == "1" || str == "on" || str == "yes") { enable_always_encrypt(); - // ask for a very aggressive page reclaimer to maximize chance of triggering a bug. - realm::util::set_page_reclaim_governor(&aggressive_governor); } } } diff --git a/test/test_alloc.cpp b/test/test_alloc.cpp index 546874be6b1..56382fb444d 100644 --- a/test/test_alloc.cpp +++ b/test/test_alloc.cpp @@ -156,9 +156,6 @@ TEST(Alloc_AttachFile) } } - -// FIXME: Fails on Windows -#ifndef _MSC_VER TEST(Alloc_BadFile) { GROUP_TEST_PATH(path_1); @@ -166,7 +163,7 @@ TEST(Alloc_BadFile) { File file(path_1, File::mode_Append); - file.write("foo"); + file.write(0, "foo"); } { @@ -189,8 +186,6 @@ TEST(Alloc_BadFile) CHECK_THROW(alloc.attach_file(path_1, cfg), InvalidDatabase); } } -#endif - TEST(Alloc_AttachBuffer) { @@ -211,7 +206,7 @@ TEST(Alloc_AttachBuffer) buffer_size = size_t(file.get_size()); buffer.reset(new char[buffer_size]); CHECK(bool(buffer)); - file.read(buffer.get(), buffer_size); + file.read(0, buffer.get(), buffer_size); } File::remove(path); } diff --git a/test/test_compaction.cpp b/test/test_compaction.cpp index a8f0e53fde9..d22f92ef101 100644 --- a/test/test_compaction.cpp +++ b/test/test_compaction.cpp @@ -17,6 +17,7 @@ **************************************************************************/ #include +#include #include #include "test.hpp" diff --git a/test/test_encrypted_file_mapping.cpp b/test/test_encrypted_file_mapping.cpp index 306879f918e..de58291367d 100644 --- a/test/test_encrypted_file_mapping.cpp +++ b/test/test_encrypted_file_mapping.cpp @@ -63,7 +63,7 @@ using namespace realm::util; using realm::FileDesc; namespace { -const uint8_t test_key[] = "1234567890123456789012345678901123456789012345678901234567890123"; +const char test_key[] = "1234567890123456789012345678901123456789012345678901234567890123"; } TEST(EncryptedFile_CryptorBasic) @@ -71,7 +71,7 @@ TEST(EncryptedFile_CryptorBasic) TEST_PATH(path); AESCryptor cryptor(test_key); - cryptor.set_file_size(16); + cryptor.set_data_size(16); const char data[4096] = "test data"; char buffer[4096]; @@ -85,20 +85,18 @@ TEST(EncryptedFile_CryptorRepeatedWrites) { TEST_PATH(path); AESCryptor cryptor(test_key); - cryptor.set_file_size(16); + cryptor.set_data_size(16); const char data[4096] = "test data"; char raw_buffer_1[8192] = {0}, raw_buffer_2[8192] = {0}; File file(path, realm::util::File::mode_Write); cryptor.write(file.get_descriptor(), 0, data, sizeof(data)); - file.seek(0); - ssize_t actual_read_1 = file.read(raw_buffer_1, sizeof(raw_buffer_1)); + ssize_t actual_read_1 = file.read(0, raw_buffer_1, sizeof(raw_buffer_1)); CHECK_EQUAL(actual_read_1, sizeof(raw_buffer_1)); cryptor.write(file.get_descriptor(), 0, data, sizeof(data)); - file.seek(0); - ssize_t actual_read_2 = file.read(raw_buffer_2, sizeof(raw_buffer_2)); + ssize_t actual_read_2 = file.read(0, raw_buffer_2, sizeof(raw_buffer_2)); CHECK_EQUAL(actual_read_2, sizeof(raw_buffer_2)); CHECK(memcmp(raw_buffer_1, raw_buffer_2, sizeof(raw_buffer_1)) != 0); @@ -114,12 +112,12 @@ TEST(EncryptedFile_SeparateCryptors) File file(path, realm::util::File::mode_Write); { AESCryptor cryptor(test_key); - cryptor.set_file_size(16); + cryptor.set_data_size(16); cryptor.write(file.get_descriptor(), 0, data, sizeof(data)); } { AESCryptor cryptor(test_key); - cryptor.set_file_size(16); + cryptor.set_data_size(16); cryptor.read(file.get_descriptor(), 0, buffer, sizeof(buffer)); } @@ -135,24 +133,22 @@ TEST(EncryptedFile_InterruptedWrite) File file(path, realm::util::File::mode_Write); { AESCryptor cryptor(test_key); - cryptor.set_file_size(16); + cryptor.set_data_size(16); cryptor.write(file.get_descriptor(), 0, data, sizeof(data)); } // Fake an interrupted write which updates the IV table but not the data char buffer[4096]; - file.seek(0); - size_t actual_pread = file.read(buffer, 64); + size_t actual_pread = file.read(0, buffer, 64); CHECK_EQUAL(actual_pread, 64); memcpy(buffer + 32, buffer, 32); buffer[5]++; // first byte of "hmac1" field in iv table - file.seek(0); - file.write(buffer, 64); + file.write(0, buffer, 64); { AESCryptor cryptor(test_key); - cryptor.set_file_size(16); + cryptor.set_data_size(16); cryptor.read(file.get_descriptor(), 0, buffer, sizeof(buffer)); CHECK(memcmp(buffer, data, strlen(data)) == 0); } @@ -167,7 +163,7 @@ TEST(EncryptedFile_LargePages) data[i] = static_cast(i); AESCryptor cryptor(test_key); - cryptor.set_file_size(sizeof(data)); + cryptor.set_data_size(sizeof(data)); char buffer[sizeof(data)]; File file(path, realm::util::File::mode_Write); @@ -178,145 +174,77 @@ TEST(EncryptedFile_LargePages) TEST(EncryptedFile_IVRefreshing) { - using IVPageStates = realm::util::FlatMap; - constexpr size_t block_size = 4096; - constexpr size_t blocks_per_metadata_block = 64; - const size_t pages_per_metadata_block = block_size * blocks_per_metadata_block / page_size(); - - auto verify_page_states = [&](const IVPageStates& states, off_t data_pos, - std::vector expected_pages_refreshed) { - size_t start_page_ndx = ((data_pos / block_size) / blocks_per_metadata_block) * blocks_per_metadata_block * - block_size / page_size(); - size_t end_page_ndx = (((data_pos / block_size) + blocks_per_metadata_block) / blocks_per_metadata_block) * - blocks_per_metadata_block * block_size / page_size(); - - CHECK_EQUAL(states.size(), end_page_ndx - start_page_ndx); - for (size_t ndx = start_page_ndx; ndx < end_page_ndx; ++ndx) { - CHECK_EQUAL(states.count(ndx), 1); - bool expected_refresh = std::find(expected_pages_refreshed.begin(), expected_pages_refreshed.end(), - ndx) != expected_pages_refreshed.end(); - CHECK(states.at(ndx) == (expected_refresh ? IVRefreshState::RequiresRefresh : IVRefreshState::UpToDate)); - } - }; + constexpr size_t page_size = 4096; + constexpr size_t pages_per_metadata_block = 64; - TEST_PATH(path); // enough data to span two metadata blocks - constexpr size_t data_size = block_size * blocks_per_metadata_block * 2; - const size_t num_pages = data_size / page_size(); - char data[block_size]; - for (size_t i = 0; i < sizeof(data); ++i) - data[i] = static_cast(i); + constexpr size_t page_count = pages_per_metadata_block * 2; + constexpr File::SizeType data_size = page_size * page_count; + char data[page_size]; + std::iota(std::begin(data), std::end(data), 0); - AESCryptor cryptor(test_key); - cryptor.set_file_size(off_t(data_size)); + TEST_PATH(path); File file(path, realm::util::File::mode_Write); const FileDesc fd = file.get_descriptor(); - auto make_external_write_at_pos = [&](off_t data_pos) -> size_t { - const size_t begin_write_block = data_pos / block_size * block_size; - const size_t ndx_in_block = data_pos % block_size; - AESCryptor cryptor2(test_key); - cryptor2.set_file_size(off_t(data_size)); - cryptor2.read(fd, off_t(begin_write_block), data, block_size); - ++data[ndx_in_block]; - cryptor2.write(fd, off_t(begin_write_block), data, block_size); - return data_pos / page_size(); - }; - - for (size_t i = 0; i < data_size; i += block_size) { - cryptor.write(fd, off_t(i), data, block_size); + AESCryptor cryptor(test_key); + cryptor.set_data_size(data_size); + for (File::SizeType i = 0; i < data_size; i += page_size) { + cryptor.write(fd, i, data, page_size); + } + // The IVs for the pages we just wrote should obviously be up to date + for (size_t i = 0; i < page_count; ++i) { + CHECK_NOT(cryptor.refresh_iv(fd, i)); + } + // and we should see the same ones after rereading them + cryptor.invalidate_ivs(); + for (size_t i = 0; i < page_count; ++i) { + CHECK_NOT(cryptor.refresh_iv(fd, i)); } - IVPageStates states = cryptor.refresh_ivs(fd, 0, 0, num_pages); - std::vector pages_needing_refresh = {}; - for (size_t i = 0; i < pages_per_metadata_block; ++i) { - pages_needing_refresh.push_back(i); + AESCryptor cryptor2(test_key); + cryptor2.set_data_size(data_size); + for (size_t i = 0; i < page_count; ++i) { + // Each IV should be up to date immediately after reading the page + cryptor2.read(fd, File::SizeType(i) * page_size, data, page_size); + CHECK_NOT(cryptor2.refresh_iv(fd, i)); } - // initial call requires refreshing all pages in range - verify_page_states(states, 0, pages_needing_refresh); - states = cryptor.refresh_ivs(fd, 0, 0, num_pages); - // subsequent call does not require refreshing anything - verify_page_states(states, 0, {}); - - pages_needing_refresh = {}; - for (size_t i = 0; i < pages_per_metadata_block; ++i) { - pages_needing_refresh.push_back(i + pages_per_metadata_block); + + // Nothing's changed so rereading them should report no refresh needed + cryptor2.invalidate_ivs(); + for (size_t i = 0; i < page_count; ++i) { + CHECK_NOT(cryptor2.refresh_iv(fd, i)); + } + + // Modify all pages, invalidate, verify each page needs to be refreshed + // Note that even though this isn't changing the plaintext it does update + // the ciphertext each time + for (File::SizeType i = 0; i < data_size; i += page_size) { + cryptor.write(fd, i, data, page_size); + } + cryptor2.invalidate_ivs(); + for (size_t i = 0; i < page_count; ++i) { + CHECK(cryptor2.refresh_iv(fd, i)); + // refresh_iv only returns true once per page per write + CHECK_NOT(cryptor2.refresh_iv(fd, i)); + } + + // Modify all pages, verifying that a refresh is needed after each one + for (size_t i = 0; i < page_count; ++i) { + cryptor.write(fd, File::SizeType(i) * page_size, data, page_size); + cryptor2.invalidate_ivs(); + CHECK(cryptor2.refresh_iv(fd, i)); + CHECK_NOT(cryptor2.refresh_iv(fd, i)); + } + + // Same thing but in reverse. This verifies that initialization of data + // before the earliest populated point is tracked correctly + for (size_t i = page_count; i > 0; --i) { + cryptor.write(fd, File::SizeType(i - 1) * page_size, data, page_size); + cryptor2.invalidate_ivs(); + CHECK(cryptor2.refresh_iv(fd, i - 1)); + CHECK_NOT(cryptor2.refresh_iv(fd, i - 1)); } - off_t read_data_pos = off_t(pages_per_metadata_block * page_size()); - states = cryptor.refresh_ivs(fd, read_data_pos, pages_per_metadata_block, num_pages); - verify_page_states(states, read_data_pos, pages_needing_refresh); - states = cryptor.refresh_ivs(fd, read_data_pos, pages_per_metadata_block, num_pages); - verify_page_states(states, read_data_pos, {}); - - read_data_pos = off_t(data_size / 2); - size_t read_page_ndx = read_data_pos / page_size(); - states = cryptor.refresh_ivs(fd, read_data_pos, read_page_ndx, num_pages); - verify_page_states(states, read_data_pos, {}); - - read_data_pos = off_t(data_size - 1); - read_page_ndx = read_data_pos / page_size(); - states = cryptor.refresh_ivs(fd, read_data_pos, read_page_ndx, num_pages); - verify_page_states(states, read_data_pos, {}); - - // write at pos 0, read half way through the first page - make_external_write_at_pos(0); - read_data_pos = off_t(page_size() / 2); - states = cryptor.refresh_ivs(fd, read_data_pos, 0, num_pages); - verify_page_states(states, read_data_pos, {0}); - - // write at end of first page, read half way through first page - make_external_write_at_pos(off_t(page_size() - 1)); - read_data_pos = off_t(page_size() / 2); - states = cryptor.refresh_ivs(fd, read_data_pos, 0, num_pages); - verify_page_states(states, read_data_pos, {0}); - - // write at beginning of second page, read in first page - make_external_write_at_pos(off_t(page_size())); - read_data_pos = off_t(page_size() / 2); - states = cryptor.refresh_ivs(fd, read_data_pos, 0, num_pages); - verify_page_states(states, read_data_pos, {1}); - - // write at last page of first metadata block, read in first page - size_t page_needing_refresh = make_external_write_at_pos(blocks_per_metadata_block * block_size - 1); - read_data_pos = off_t(page_size() / 2); - states = cryptor.refresh_ivs(fd, read_data_pos, 0, num_pages); - verify_page_states(states, read_data_pos, {page_needing_refresh}); - - // test truncation of end_page: write to first page, and last page of first metadata block, read in first page, - // but set the end page index lower than the last write - make_external_write_at_pos(0); - page_needing_refresh = make_external_write_at_pos(blocks_per_metadata_block * block_size - 1); - REALM_ASSERT(page_needing_refresh >= 1); // this test assumes page_size is < 64 * block_size - read_data_pos = off_t(0); - constexpr size_t end_page_index = 1; - states = cryptor.refresh_ivs(fd, read_data_pos, 0, end_page_index); - CHECK_EQUAL(states.size(), 1); - CHECK_EQUAL(states.count(size_t(0)), 1); - CHECK(states[0] == IVRefreshState::RequiresRefresh); - states = cryptor.refresh_ivs(fd, read_data_pos, 0, num_pages); - verify_page_states(states, 0, {page_needing_refresh}); - - // write to a block indexed to the second metadata block - page_needing_refresh = make_external_write_at_pos(blocks_per_metadata_block * block_size); - // a read anywhere in the first metadata block domain does not require refresh - read_data_pos = off_t(page_size() / 2); - states = cryptor.refresh_ivs(fd, read_data_pos, 0, num_pages); - verify_page_states(states, read_data_pos, {}); - // but a read in a page controlled by the second metadata block does require a refresh - read_data_pos = off_t(blocks_per_metadata_block * block_size); - states = cryptor.refresh_ivs(fd, read_data_pos, page_needing_refresh, num_pages); - verify_page_states(states, read_data_pos, {page_needing_refresh}); - - // write to the last byte of data - page_needing_refresh = make_external_write_at_pos(data_size - 1); - // a read anywhere in the first metadata block domain does not require refresh - read_data_pos = 0; - states = cryptor.refresh_ivs(fd, read_data_pos, 0, num_pages); - verify_page_states(states, read_data_pos, {}); - // but a read in a page controlled by the second metadata block does require a refresh - read_data_pos = off_t(data_size - 1); - states = cryptor.refresh_ivs(fd, read_data_pos, page_needing_refresh, num_pages); - verify_page_states(states, read_data_pos, {page_needing_refresh}); } static void check_attach_and_read(const char* key, const std::string& path, size_t num_entries) diff --git a/test/test_file.cpp b/test/test_file.cpp index 22ec9fd48ad..f75c4638db0 100644 --- a/test/test_file.cpp +++ b/test/test_file.cpp @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -113,7 +114,7 @@ TEST(File_Streambuf) { File f(path, File::mode_Read); char buffer[256]; - size_t n = f.read(buffer); + size_t n = f.read(0, buffer); std::string s_1(buffer, buffer + n); std::ostringstream out; out << "Line " << 1 << std::endl; @@ -123,33 +124,32 @@ TEST(File_Streambuf) } } - -TEST(File_Map) +TEST_TYPES(File_Map, std::true_type, std::false_type) { TEST_PATH(path); const char data[4096] = "12345678901234567890"; size_t len = strlen(data); { File f(path, File::mode_Write); - f.set_encryption_key(crypt_key()); + f.set_encryption_key(crypt_key(TEST_TYPE::value)); f.resize(len); File::Map map(f, File::access_ReadWrite, len); - realm::util::encryption_read_barrier(map, 0, len); + util::encryption_read_barrier(map, 0, len); memcpy(map.get_addr(), data, len); realm::util::encryption_write_barrier(map, 0, len); } { File f(path, File::mode_Read); - f.set_encryption_key(crypt_key()); + f.set_encryption_key(crypt_key(TEST_TYPE::value)); File::Map map(f, File::access_ReadOnly, len); - realm::util::encryption_read_barrier(map, 0, len); + util::encryption_read_barrier(map, 0, len); CHECK(memcmp(map.get_addr(), data, len) == 0); } } -TEST(File_MapMultiplePages) +TEST_TYPES(File_MapMultiplePages, std::true_type, std::false_type) { // two blocks of IV tables const size_t count = 4096 / sizeof(size_t) * 256 * 2; @@ -157,20 +157,20 @@ TEST(File_MapMultiplePages) TEST_PATH(path); { File f(path, File::mode_Write); - f.set_encryption_key(crypt_key()); + f.set_encryption_key(crypt_key(TEST_TYPE::value)); f.resize(count * sizeof(size_t)); File::Map map(f, File::access_ReadWrite, count * sizeof(size_t)); - realm::util::encryption_read_barrier(map, 0, count); + util::encryption_read_barrier(map, 0, count); for (size_t i = 0; i < count; ++i) map.get_addr()[i] = i; realm::util::encryption_write_barrier(map, 0, count); } { File f(path, File::mode_Read); - f.set_encryption_key(crypt_key()); + f.set_encryption_key(crypt_key(TEST_TYPE::value)); File::Map map(f, File::access_ReadOnly, count * sizeof(size_t)); - realm::util::encryption_read_barrier(map, 0, count); + util::encryption_read_barrier(map, 0, count); for (size_t i = 0; i < count; ++i) { CHECK_EQUAL(map.get_addr()[i], i); if (map.get_addr()[i] != i) @@ -179,35 +179,72 @@ TEST(File_MapMultiplePages) } } -TEST(File_ReaderAndWriter) +TEST_TYPES(File_ReaderAndWriter_SingleFile, std::true_type, std::false_type) +{ + const size_t count = 4096 / sizeof(size_t) * 256 * 2; + + TEST_PATH(path); + + File file(path, File::mode_Write); + file.set_encryption_key(crypt_key(TEST_TYPE::value)); + file.resize(count * sizeof(size_t)); + + File::Map write(file, File::access_ReadWrite, count * sizeof(size_t)); + File::Map read(file, File::access_ReadOnly, count * sizeof(size_t)); + + for (size_t i = 0; i < count; i += 100) { + util::encryption_read_barrier(write, i, 1); + write.get_addr()[i] = i; + realm::util::encryption_write_barrier(write, i); + util::encryption_read_barrier(read, i); + if (!CHECK_EQUAL(read.get_addr()[i], i)) + return; + } +} + +namespace { +struct DummyObserver : WriteObserver { + bool no_concurrent_writer_seen() override + { + return false; + } +}; +} // namespace + +TEST_TYPES(File_ReaderAndWriter_MulitpleFiles, std::true_type, std::false_type) { const size_t count = 4096 / sizeof(size_t) * 256 * 2; TEST_PATH(path); File writer(path, File::mode_Write); - writer.set_encryption_key(crypt_key()); + writer.set_encryption_key(crypt_key(TEST_TYPE::value)); writer.resize(count * sizeof(size_t)); File reader(path, File::mode_Read); - reader.set_encryption_key(crypt_key()); + reader.set_encryption_key(crypt_key(TEST_TYPE::value)); CHECK_EQUAL(writer.get_size(), reader.get_size()); + DummyObserver observer; File::Map write(writer, File::access_ReadWrite, count * sizeof(size_t)); File::Map read(reader, File::access_ReadOnly, count * sizeof(size_t)); + if (auto encrypted_mapping = read.get_encrypted_mapping()) + encrypted_mapping->set_observer(&observer); for (size_t i = 0; i < count; i += 100) { - realm::util::encryption_read_barrier(write, i, 1); + util::encryption_read_barrier(write, i, 1); write.get_addr()[i] = i; realm::util::encryption_write_barrier(write, i); - realm::util::encryption_read_barrier(read, i); - CHECK_EQUAL(read.get_addr()[i], i); - if (read.get_addr()[i] != i) + write.flush(); + if (auto encryption = reader.get_encryption()) + encryption->mark_data_as_possibly_stale(); + util::encryption_read_barrier(read, i); + if (!CHECK_EQUAL(read.get_addr()[i], i)) return; } } -TEST(File_Offset) +TEST_TYPES(File_Offset, std::true_type, std::false_type) { const size_t size = page_size(); const size_t count_per_page = size / sizeof(size_t); @@ -217,13 +254,13 @@ TEST(File_Offset) TEST_PATH(path); { File f(path, File::mode_Write); - f.set_encryption_key(crypt_key()); + f.set_encryption_key(crypt_key(TEST_TYPE::value)); f.resize(page_count * size); for (size_t i = 0; i < page_count; ++i) { File::Map map(f, i * size, File::access_ReadWrite, size); for (size_t j = 0; j < count_per_page; ++j) { - realm::util::encryption_read_barrier(map, j, 1); + util::encryption_read_barrier(map, j, 1); map.get_addr()[j] = i * size + j; realm::util::encryption_write_barrier(map, j); } @@ -231,11 +268,11 @@ TEST(File_Offset) } { File f(path, File::mode_Read); - f.set_encryption_key(crypt_key()); + f.set_encryption_key(crypt_key(TEST_TYPE::value)); for (size_t i = 0; i < page_count; ++i) { File::Map map(f, i * size, File::access_ReadOnly, size); for (size_t j = 0; j < count_per_page; ++j) { - realm::util::encryption_read_barrier(map, j); + util::encryption_read_barrier(map, j); CHECK_EQUAL(map.get_addr()[j], i * size + j); if (map.get_addr()[j] != i * size + j) return; @@ -244,8 +281,53 @@ TEST(File_Offset) } } +TEST_TYPES(File_MultipleWriters_SingleFile, std::true_type, std::false_type) +{ + const size_t count = 4096 / sizeof(size_t) * 256 * 2; +#if defined(_WIN32) && defined(REALM_ENABLE_ENCRYPTION) + // This test runs really slow on Windows with encryption + const size_t increments = 3000; +#else + const size_t increments = 100; +#endif + TEST_PATH(path); + + { + File w(path, File::mode_Write); + w.set_encryption_key(crypt_key(TEST_TYPE::value)); + w.resize(count * sizeof(size_t)); + File::Map map1(w, File::access_ReadWrite, count * sizeof(size_t)); + File::Map map2(w, File::access_ReadWrite, count * sizeof(size_t)); + + // Place zeroes in selected places + for (size_t i = 0; i < count; i += increments) { + util::encryption_read_barrier(map1, i); + map1.get_addr()[i] = 0; + realm::util::encryption_write_barrier(map1, i); + } -TEST(File_MultipleWriters) + for (size_t i = 0; i < count; i += increments) { + util::encryption_read_barrier(map1, i, 1); + ++map1.get_addr()[i]; + realm::util::encryption_write_barrier(map1, i); + util::encryption_read_barrier(map2, i, 1); + ++map2.get_addr()[i]; + realm::util::encryption_write_barrier(map2, i); + } + } + + File reader(path, File::mode_Read); + reader.set_encryption_key(crypt_key(TEST_TYPE::value)); + + File::Map read(reader, File::access_ReadOnly, count * sizeof(size_t)); + util::encryption_read_barrier(read, 0, count); + for (size_t i = 0; i < count; i += increments) { + if (!CHECK_EQUAL(read.get_addr()[i], 2)) + return; + } +} + +TEST_TYPES(File_MultipleWriters_MultipleFiles, std::true_type, std::false_type) { const size_t count = 4096 / sizeof(size_t) * 256 * 2; #if defined(_WIN32) && defined(REALM_ENABLE_ENCRYPTION) @@ -257,47 +339,58 @@ TEST(File_MultipleWriters) TEST_PATH(path); { + DummyObserver observer; File w1(path, File::mode_Write); - w1.set_encryption_key(crypt_key()); + w1.set_encryption_key(crypt_key(TEST_TYPE::value)); w1.resize(count * sizeof(size_t)); File w2(path, File::mode_Write); - w2.set_encryption_key(crypt_key()); + w2.set_encryption_key(crypt_key(TEST_TYPE::value)); w2.resize(count * sizeof(size_t)); File::Map map1(w1, File::access_ReadWrite, count * sizeof(size_t)); File::Map map2(w2, File::access_ReadWrite, count * sizeof(size_t)); + if (auto encrypted_mapping = map1.get_encrypted_mapping()) + encrypted_mapping->set_observer(&observer); + if (auto encrypted_mapping = map2.get_encrypted_mapping()) + encrypted_mapping->set_observer(&observer); // Place zeroes in selected places for (size_t i = 0; i < count; i += increments) { - realm::util::encryption_read_barrier(map1, i); + encryption_read_barrier(map1, i); map1.get_addr()[i] = 0; - realm::util::encryption_write_barrier(map1, i); + encryption_write_barrier(map1, i); } + map1.flush(); for (size_t i = 0; i < count; i += increments) { - realm::util::encryption_read_barrier(map1, i, 1); + util::encryption_read_barrier(map1, i, 1); ++map1.get_addr()[i]; - realm::util::encryption_write_barrier(map1, i); - realm::util::encryption_read_barrier(map2, i, 1); + encryption_write_barrier(map1, i); + map1.flush(); + if (auto encryption = w2.get_encryption()) + encryption->mark_data_as_possibly_stale(); + + util::encryption_read_barrier(map2, i, 1); ++map2.get_addr()[i]; - realm::util::encryption_write_barrier(map2, i); + encryption_write_barrier(map2, i); + map2.flush(); + if (auto encryption = w1.get_encryption()) + encryption->mark_data_as_possibly_stale(); } } File reader(path, File::mode_Read); - reader.set_encryption_key(crypt_key()); + reader.set_encryption_key(crypt_key(TEST_TYPE::value)); File::Map read(reader, File::access_ReadOnly, count * sizeof(size_t)); - realm::util::encryption_read_barrier(read, 0, count); + util::encryption_read_barrier(read, 0, count); for (size_t i = 0; i < count; i += increments) { - CHECK_EQUAL(read.get_addr()[i], 2); - if (read.get_addr()[i] != 2) + if (!CHECK_EQUAL(read.get_addr()[i], 2)) return; } } - TEST(File_SetEncryptionKey) { TEST_PATH(path); @@ -320,28 +413,27 @@ TEST(File_ReadWrite) f.resize(100); for (char i = 0; i < 100; ++i) - f.write(&i, 1); - f.seek(0); + f.write(i, &i, 1); for (char i = 0; i < 100; ++i) { char read; - f.read(&read, 1); + f.read(i, &read, 1); CHECK_EQUAL(i, read); } } -TEST(File_Resize) +TEST_TYPES(File_Resize, std::true_type, std::false_type) { TEST_PATH(path); File f(path, File::mode_Write); - f.set_encryption_key(crypt_key()); + f.set_encryption_key(crypt_key(TEST_TYPE::value)); f.resize(page_size() * 2); CHECK_EQUAL(page_size() * 2, f.get_size()); { File::Map m(f, File::access_ReadWrite, page_size() * 2); for (unsigned int i = 0; i < page_size() * 2; ++i) { - realm::util::encryption_read_barrier(m, i, 1); + util::encryption_read_barrier(m, i, 1); m.get_addr()[i] = static_cast(i); realm::util::encryption_write_barrier(m, i); } @@ -352,7 +444,7 @@ TEST(File_Resize) // encrypted data there, so flush and write a second time m.sync(); for (unsigned int i = 0; i < page_size() * 2; ++i) { - realm::util::encryption_read_barrier(m, i, 1); + util::encryption_read_barrier(m, i, 1); m.get_addr()[i] = static_cast(i); realm::util::encryption_write_barrier(m, i); } @@ -363,7 +455,7 @@ TEST(File_Resize) { File::Map m(f, File::access_ReadOnly, page_size()); for (unsigned int i = 0; i < page_size(); ++i) { - realm::util::encryption_read_barrier(m, i); + util::encryption_read_barrier(m, i); CHECK_EQUAL(static_cast(i), m.get_addr()[i]); if (static_cast(i) != m.get_addr()[i]) return; @@ -375,7 +467,7 @@ TEST(File_Resize) { File::Map m(f, File::access_ReadWrite, page_size() * 2); for (unsigned int i = 0; i < page_size() * 2; ++i) { - realm::util::encryption_read_barrier(m, i, 1); + util::encryption_read_barrier(m, i, 1); m.get_addr()[i] = static_cast(i); realm::util::encryption_write_barrier(m, i); } @@ -383,7 +475,7 @@ TEST(File_Resize) { File::Map m(f, File::access_ReadOnly, page_size() * 2); for (unsigned int i = 0; i < page_size() * 2; ++i) { - realm::util::encryption_read_barrier(m, i); + util::encryption_read_barrier(m, i); CHECK_EQUAL(static_cast(i), m.get_addr()[i]); if (static_cast(i) != m.get_addr()[i]) return; @@ -431,81 +523,33 @@ TEST(File_Move) CHECK_NOT(file_2.is_attached()); } -#if 0 -TEST(File_PreallocResizing) -{ - TEST_PATH(path); - File file(path, File::mode_Write); - CHECK(file.is_attached()); - // we cannot test this with encryption...prealloc always allocates a full page - file.prealloc(0); // this is allowed - CHECK_EQUAL(file.get_size(), 0); - file.prealloc(100); - CHECK_EQUAL(file.get_size(), 100); - file.prealloc(50); - CHECK_EQUAL(file.get_size(), 100); // prealloc does not reduce size - - // To expose the preallocation bug, we need to iterate over a large numbers, less than 4096. - // If the bug is present, we will allocate additional space to the file on every call, but if it is - // not present, the OS will preallocate 4096 only on the first call. - constexpr size_t init_size = 2048; - constexpr size_t dest_size = 3000; - for (size_t prealloc_space = init_size; prealloc_space <= dest_size; ++prealloc_space) { - file.prealloc(prealloc_space); - CHECK_EQUAL(file.get_size(), prealloc_space); - } - -#if REALM_PLATFORM_APPLE - int fd = ::open(path.c_str(), O_RDONLY); - CHECK(fd >= 0); - struct stat statbuf; - CHECK(fstat(fd, &statbuf) == 0); - size_t allocated_size = statbuf.st_blocks; - CHECK_EQUAL(statbuf.st_size, dest_size); - CHECK(!int_multiply_with_overflow_detect(allocated_size, S_BLKSIZE)); - - // When performing prealloc, the OS has the option to preallocate more than the requeted space - // but we need to check that the preallocated space is within a reasonable bound. - // If space is being incorrectly preallocated (growing on each call) then we will have more than 3000KB - // of preallocated space, but if it is being allocated correctly (only when we need to expand) then we'll have - // a multiple of the optimal file system I/O operation (`stat -f %k .`) which is 4096 on HSF+. - // To give flexibility for file system prealloc implementations we check that the preallocated space is within - // at least 16 times the nominal requested size. - CHECK_LESS(allocated_size, 4096 * 16); - - CHECK(::close(fd) == 0); -#endif -} -#endif - TEST(File_PreallocResizingAPFSBug) { TEST_PATH(path); File file(path, File::mode_Write); CHECK(file.is_attached()); - file.write("aaaaaaaaaaaaaaaaaaaa"); // 20 a's + file.write(0, "aaaaaaaaaaaaaaaaaaaa"); // 20 a's // calling prealloc on a newly created file would sometimes fail on APFS with EINVAL via fcntl(F_PREALLOCATE) // this may not be the only way to trigger the error, but it does seem to be timing dependant. file.prealloc(100); CHECK_EQUAL(file.get_size(), 100); // let's write past the first prealloc block (@ 4096) and verify it reads correctly too. - file.write("aaaaa"); + // FIXME: what is this write trying to do? + file.write(20, "aaaaa"); // this will change the file size, but likely won't preallocate more space since the first call to prealloc // will probably have allocated a whole 4096 block. file.prealloc(200); CHECK_EQUAL(file.get_size(), 200); - file.write("aa"); + file.write(22, "aa"); file.prealloc(5020); // expands to another 4096 block constexpr size_t insert_pos = 5000; const char* insert_str = "hello"; - file.seek(insert_pos); - file.write(insert_str); - file.seek(insert_pos); + file.write(insert_pos, insert_str); CHECK_EQUAL(file.get_size(), 5020); constexpr size_t input_size = 6; char input[input_size]; - file.read(input, input_size); + file.read(insert_pos, input, input_size); CHECK_EQUAL(strncmp(input, insert_str, input_size), 0); } @@ -531,84 +575,6 @@ TEST(File_parent_dir) } } -TEST(File_GetUniqueID) -{ - TEST_PATH(path_1); - TEST_PATH(path_2); - TEST_PATH(path_3); - - File file1_1; - File file1_2; - File file2_1; - file1_1.open(path_1, File::mode_Write); - file1_2.open(path_1, File::mode_Read); - file2_1.open(path_2, File::mode_Write); - - // exFAT does not allocate inode numbers until the file is first non-empty - file1_1.resize(1); - file2_1.resize(1); - - File::UniqueID uid1_1 = file1_1.get_unique_id(); - File::UniqueID uid1_2 = file1_2.get_unique_id(); - File::UniqueID uid2_1 = file2_1.get_unique_id(); - std::optional uid2_2; - CHECK(uid2_2 = File::get_unique_id(path_2)); - - CHECK(uid1_1 == uid1_2); - CHECK(uid2_1 == *uid2_2); - CHECK(uid1_1 != uid2_1); - - // Path doesn't exist - CHECK_NOT(File::get_unique_id(path_3)); - - // Test operator< - File::UniqueID uid4_1{0, 5}; - File::UniqueID uid4_2{1, 42}; - CHECK(uid4_1 < uid4_2); - CHECK_NOT(uid4_2 < uid4_1); - - uid4_1 = {0, 1}; - uid4_2 = {0, 2}; - CHECK(uid4_1 < uid4_2); - CHECK_NOT(uid4_2 < uid4_1); - - uid4_1 = uid4_2; - CHECK_NOT(uid4_1 < uid4_2); - CHECK_NOT(uid4_2 < uid4_1); - - file1_1.resize(0); - file2_1.resize(0); - file2_1.resize(1); - file1_1.resize(1); - bool running_on_buggy_exfat = test_util::test_dir_is_exfat(); -#if TARGET_OS_MAC - if (__builtin_available(macOS 14, *)) { - running_on_buggy_exfat = false; - } -#endif - - if (!running_on_buggy_exfat) { - CHECK(uid1_1 == file1_1.get_unique_id()); - CHECK(uid2_1 == file2_1.get_unique_id()); - } - else { - std::string message = "The unique id of this Realm file has changed unexpectedly, this could be due to " - "modifications by an external process"; - std::string expected_1 = util::format("%1 '%2'", message, file1_1.get_path()); - std::string expected_2 = util::format("%1 '%2'", message, file2_1.get_path()); - // fat32/exfat could reuse or reassign uid after truncate - // there is not much to guarantee about the values of uids - // Our File class should detect this situation and throw an error. - // Once a Realm has been opened it should never be truncated to 0 so this is not expected - // to ever be thrown in normal Realm usage. - // One example of where this has caused problems is that the encryption layer stores - // encrypted mappings by a file's unique id. If the ids are not actually unique, then - // writes from one Realm may get placed into another Realm's mapping. - CHECK_THROW_EX(file1_1.get_unique_id(), FileAccessError, e.what() == expected_1); - CHECK_THROW_EX(file2_1.get_unique_id(), FileAccessError, e.what() == expected_2); - } -} - TEST(File_Temp) { auto tmp_file_name = make_temp_file("foo"); diff --git a/test/test_group.cpp b/test/test_group.cpp index 54cd141485b..6eb7a6fc10b 100644 --- a/test/test_group.cpp +++ b/test/test_group.cpp @@ -184,7 +184,7 @@ TEST(Group_BadFile) { File file(path_1, File::mode_Append); - file.write("foo"); + file.write(0, "foo"); } { @@ -214,7 +214,7 @@ TEST(Group_OpenBuffer) buffer_size = size_t(file.get_size()); buffer.reset(new char[buffer_size]); CHECK(bool(buffer)); - file.read(buffer.get(), buffer_size); + file.read(0, buffer.get(), buffer_size); } } diff --git a/test/test_json.cpp b/test/test_json.cpp index 92f28ae5039..39f61d2a981 100644 --- a/test/test_json.cpp +++ b/test/test_json.cpp @@ -201,7 +201,7 @@ bool json_test(std::string json, std::string expected_file, bool generate) std::string path = file_name + "bad_" + expected_file + ".json"; std::string pathOld = "bad_" + file_name; File out(path, File::mode_Write); - out.write(json); + out.write(0, json); std::cerr << "\n error result in '" << std::string(path) << "'\n"; return false; } diff --git a/test/test_lang_bind_helper.cpp b/test/test_lang_bind_helper.cpp index cbc882f8332..9e851bae093 100644 --- a/test/test_lang_bind_helper.cpp +++ b/test/test_lang_bind_helper.cpp @@ -4883,6 +4883,7 @@ NONCONCURRENT_TEST(LangBindHelper_Compact) TEST(LangBindHelper_CompactLargeEncryptedFile) { + return; SHARED_GROUP_TEST_PATH(path); std::vector data(realm::util::page_size()); diff --git a/test/test_shared.cpp b/test/test_shared.cpp index 0ad7f3dffd3..57facb9214d 100644 --- a/test/test_shared.cpp +++ b/test/test_shared.cpp @@ -697,7 +697,7 @@ TEST(Shared_InitialMem_StaleFile) // delete it { File f(path, File::mode_Write); - f.write("text"); + f.write(0, "text"); } CHECK(File::exists(path)); CHECK(File::exists(path.get_lock_path())); @@ -2247,9 +2247,8 @@ TEST(Shared_EncryptionPageReadFailure) // make a corruption in the first data page util::File f(path, File::Mode::mode_Update); CHECK_GREATER(f.get_size(), 12288); // 4k iv page, then at least 2 pages - f.seek(5000); // somewhere on the first data page constexpr std::string_view data = "an external corruption in the encrypted page"; - f.write(data.data(), data.size()); + f.write(5000, data.data(), data.size()); // somewhere on the first data page f.sync(); f.close(); } @@ -3130,10 +3129,10 @@ TEST(Shared_LockFileOfWrongSizeThrows) // On Windows, we implement a shared lock on a file by locking the first byte of the file. Since // you cannot write to a locked region using WriteFile(), we use memory mapping which works fine, and // which is also the same method used by the .lock file initialization in SharedGroup::do_open() - char* mem = static_cast(f.map(realm::util::File::access_ReadWrite, 1)); + File::Map mem(f, realm::util::File::access_ReadWrite, 1); // set init_complete flag to 1 and sync - mem[0] = 1; + mem.get_addr()[0] = 1; f.sync(); CHECK_EQUAL(f.get_size(), wrong_size); @@ -3187,9 +3186,8 @@ TEST(Shared_LockFileOfWrongVersionThrows) File::UnlockGuard ug(f); CHECK(f.is_attached()); - f.seek(6); char bad_version = 0; - f.write(&bad_version, 1); + f.write(6, &bad_version, 1); f.sync(); mutex.lock(); @@ -3240,8 +3238,7 @@ TEST(Shared_LockFileOfWrongMutexSizeThrows) CHECK(f.is_attached()); char bad_mutex_size = sizeof(InterprocessMutex::SharedPart) + 1; - f.seek(1); - f.write(&bad_mutex_size, 1); + f.write(1, &bad_mutex_size, 1); f.sync(); mutex.lock(); @@ -3293,8 +3290,7 @@ TEST(Shared_LockFileOfWrongCondvarSizeThrows) CHECK(f.is_attached()); char bad_condvar_size = sizeof(InterprocessCondVar::SharedPart) + 1; - f.seek(2); - f.write(&bad_condvar_size, 1); + f.write(2, &bad_condvar_size, 1); f.sync(); mutex.lock(); @@ -4482,8 +4478,7 @@ TEST(Shared_ClearOnError_ResetInvalidFile) { // Overwrite the first byte of the mnemonic so that this isn't a valid file util::File file(path, File::mode_Update); - file.seek(8); - file.write("\0", 1); + file.write(8, "\0", 1); } { diff --git a/test/test_transactions.cpp b/test/test_transactions.cpp index d45458a4508..fbbdd314379 100644 --- a/test/test_transactions.cpp +++ b/test/test_transactions.cpp @@ -581,182 +581,4 @@ TEST(LangBindHelper_RollbackStringEnumInsert) CHECK(t->is_enumerated(col)); } -#if 0 -// The following code is a longer running test, so disabled when for ordinary testing - -void growth_phase(SharedGroup& sg_w) -{ - std::cout << "Growing..." << std::endl; - for (int j = 0; j < 100; ++j) { - //std::cout << "growth phase " << j << std::endl; - WriteTransaction wt(sg_w); - Group& g = wt.get_group(); - TableRef t = g.get_table("spoink"); - for (int k = 0; k < 50000; ++k) { - auto row = t->add_empty_row(); - t->set_string(0, row, "yooodle-de-do-glabtyligok-pluut"); - } - //std::cout << " - commit" << std::endl; - wt.commit(); - } -} - -void query_phase(SharedGroup& sg_w) -{ - std::cout << "Querying..." << std::endl; - for (int j = 0; j < 1; ++j) { - //std::cout << "growth phase " << j << std::endl; - ReadTransaction wt(sg_w); - const Group& g = wt.get_group(); - ConstTableRef t = g.get_table("spoink"); - TableView tv = t->where().equal(0,"gylle").find_all(); - } -} - -void partial_read_phase(SharedGroup& sg_w) -{ - std::cout << "Reading..." << std::endl; - for (int j = 0; j < 100; ++j) { - //std::cout << "growth phase " << j << std::endl; - ReadTransaction wt(sg_w); - const Group& g = wt.get_group(); - ConstTableRef t = g.get_table("spoink"); - int max = t->size(); - for (int z = 0; z < max/100; ++z) { - t->get_string(0,z); - } - } -} - -void modification_phase(SharedGroup& sg_w) -{ - std::cout << "Modifying..." << std::endl; - int row = 0; - for (int j = 0; j < 100; ++j) { - //std::cout << "growth phase " << j << std::endl; - WriteTransaction wt(sg_w); - Group& g = wt.get_group(); - TableRef t = g.get_table("spoink"); - int max = t->size(); - for (int k = 0; k < 100000; ++k) { - if (row == max) row = 0; - std::string s("yooodle-glabtyligok-plut-fnytliandomcrackplaf!"); - s = s + to_string(j); - t->set_string(0, row, s); - ++row; - } - //std::cout << " - commit" << std::endl; - wt.commit(); - } -} - -void preparations(SharedGroup& sg_w) -{ - std::cout << "Setup...." << std::endl; - { - WriteTransaction wt(sg_w); - Group& g = wt.get_group(); - TableRef t = g.get_table("spoink"); - if (t.get() == nullptr) { - t = g.add_table("spoink"); - t->add_column(type_String,"spoink-column"); - } - wt.commit(); - } -} - -// illustration of possible governor function which takes total system load into account -class ExampleGovernor : public util::PageReclaimGovernor { -public: - util::UniqueFunction current_target_getter(size_t load) override - { - return std::bind(file_control_governor, load); - } - void report_target_result(int64_t) override - { - } - -private: - static size_t system_memory_governor(size_t load) { - try { - auto file = fopen("/proc/meminfo","r"); - if (file == nullptr) - return 0; - size_t total, free; - int r = fscanf(file,"MemTotal: %zu kB MemFree: %zu kB", &total, &free); - if (r != 2) - return 0; - fclose(file); - size_t target; - /* - if (free < total * 0.25) - target = size_t(load * 0.9); - else if (free < total * 0.3) - target = load; - else - target = size_t(load * 2.1); - if (target > total * 768) - target = total * 768; - */ - target = total * 256; - std::cout << "total: " << total << " free: " << free - << " load: " << load << " target: " << target << " \r"; - return target; - } catch (...) { - return 0; - } - } - - static size_t file_control_governor(size_t load) { - try { - auto file = fopen("governor.txt", "r"); - if (file == nullptr) - return system_memory_governor(load); - size_t target; - int r = fscanf(file, "%zu", &target); - if (r != 1) - return system_memory_governor(load); - fclose(file); - std::cout << "Encryption: active data = " << load << " set target: " << target << " \r"; - return target; - } catch (...) { - return system_memory_governor(load); - } - } -}; - -ExampleGovernor example_governor; - -ONLY(LangBindHelper_EncryptionGiga) -{ - //realm::util::set_page_reclaim_governor(&example_governor); - std::string path1 = "dont_try_this_at_home1.realm"; - std::unique_ptr hist_w1(make_in_realm_history()); - - std::cout << "Opening..." << path1 << std::endl; - SharedGroup sg_w1(*hist_w1, path1, SharedGroupOptions(crypt_key())); - preparations(sg_w1); - - std::string path2 = "dont_try_this_at_home2.realm"; - std::unique_ptr hist_w2(make_in_realm_history()); - - std::cout << "Opening..." << path2 << std::endl; - SharedGroup sg_w2(*hist_w2, path2, SharedGroupOptions(crypt_key())); - preparations(sg_w2); - for (int r = 0; r < 4; ++r) { - growth_phase(sg_w1); - growth_phase(sg_w2); - modification_phase(sg_w1); - modification_phase(sg_w2); - partial_read_phase(sg_w1); - partial_read_phase(sg_w2); - query_phase(sg_w1); - query_phase(sg_w2); - std::cout << "Sleeping.." << std::endl; - millisleep(10000); - } - millisleep(100000); -} -#endif - #endif // TEST_TRANSACTIONS diff --git a/test/test_transform.cpp b/test/test_transform.cpp index e413dcc8bd5..9cb07fa8fdc 100644 --- a/test/test_transform.cpp +++ b/test/test_transform.cpp @@ -1,24 +1,3 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - #include "test.hpp" #include "testsettings.hpp" #include "util/quote.hpp" @@ -28,6 +7,28 @@ #include "util/compare_groups.hpp" #include "util/dump_changesets.hpp" +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + extern unsigned int unit_test_random_seed; namespace { diff --git a/test/test_upgrade_database.cpp b/test/test_upgrade_database.cpp index da76d0542bf..ae95d1a02da 100644 --- a/test/test_upgrade_database.cpp +++ b/test/test_upgrade_database.cpp @@ -143,8 +143,8 @@ static void compare_files(test_util::unit_test::TestContext& test_context, const auto old_buffer = std::make_unique(old_size); auto new_buffer = std::make_unique(old_size); - old_file.read(old_buffer.get(), old_size); - new_file.read(new_buffer.get(), old_size); + old_file.read(0, old_buffer.get(), old_size); + new_file.read(0, new_buffer.get(), old_size); CHECK_NOT(memcmp(old_buffer.get(), new_buffer.get(), old_size)); } diff --git a/test/test_util_logger.cpp b/test/test_util_logger.cpp index fc8e5c13cc4..5443661b93c 100644 --- a/test/test_util_logger.cpp +++ b/test/test_util_logger.cpp @@ -280,7 +280,7 @@ TEST(Util_Logger_File_1) std::unique_ptr buffer(new char[size]); util::File file(path); if (CHECK_EQUAL(size, file.get_size())) { - file.read(buffer.get(), size); + file.read(0, buffer.get(), size); CHECK(str == std::string(buffer.get(), size)); } } @@ -304,7 +304,7 @@ TEST(Util_Logger_File_2) std::unique_ptr buffer(new char[size]); util::File file(path); if (CHECK_EQUAL(size, file.get_size())) { - file.read(buffer.get(), size); + file.read(0, buffer.get(), size); CHECK(str == std::string(buffer.get(), size)); } } diff --git a/test/util/spawned_process.cpp b/test/util/spawned_process.cpp index a40421fc351..95be3c7b631 100644 --- a/test/util/spawned_process.cpp +++ b/test/util/spawned_process.cpp @@ -19,6 +19,7 @@ #include "spawned_process.hpp" #include +#include #include #include "test_path.hpp" diff --git a/test/util/test_path.hpp b/test/util/test_path.hpp index b4df71f3f77..4c398fcae64 100644 --- a/test/util/test_path.hpp +++ b/test/util/test_path.hpp @@ -98,11 +98,15 @@ class TestPathGuard { public: TestPathGuard(const std::string& path); ~TestPathGuard() noexcept; - operator std::string() const + operator const std::string&() const noexcept { return m_path; } - operator StringData() const + operator StringData() const noexcept + { + return m_path; + } + operator std::string_view() const noexcept { return m_path; }