Skip to content

Commit

Permalink
Clean up a bunch of old encryption cruft
Browse files Browse the repository at this point in the history
The global shared cache of encrypted file maps was originally required because
we actually opened Realm files mulitple times in normal usage, so each of the
open files had to know about each other to copy things around. #4839 made it so
that in normal usage we only ever have one DB instance per file per process, so
it became dead code. Multiprocess encryption made it unneccesary even when the
one-DB-per-process rule is violated, as the multiprocess code path covers that.

This eliminates our last reliance on file UniqueIDs, so it lets us get rid of
hacks related to that.

The encryption page reclaimer mostly never actually worked. It used a very
conserative page reclaimation rule that meant that pages would never be
reclaimed if there was a long-lived Transaction, even if it was frozen or kept
refreshed. This is very common in practice, and when it doesn't happen the DB
usually isn't kept open either, making it redundant.

Encryption used to rely on handling BAD_EXEC signals (or mach exceptions)
rather than explicit barriers, so it had to read and write in page-sized
chunks. That's no longer the case, so we can eliminate a lot of complexity by
always reading and writing in 4k blocks.
  • Loading branch information
tgoyne committed May 21, 2024
1 parent c6cb1ef commit aa3d9c0
Show file tree
Hide file tree
Showing 50 changed files with 1,401 additions and 2,962 deletions.
36 changes: 14 additions & 22 deletions src/realm/alloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,7 @@ char* Allocator::translate_less_critical(RefTranslation* ref_translation_ptr, re
RefTranslation& txl = ref_translation_ptr[idx];
size_t offset = ref - get_section_base(idx);
char* addr = txl.mapping_addr + offset;
#if REALM_ENABLE_ENCRYPTION
realm::util::encryption_read_barrier(addr, NodeHeader::header_size, txl.encrypted_mapping, nullptr);
#endif
util::encryption_read_barrier(addr, NodeHeader::header_size, txl.encrypted_mapping);
auto size = NodeHeader::get_byte_size_from_header(addr);
bool crosses_mapping = offset + size > (1 << section_shift);
// Move the limit on use of the existing primary mapping.
Expand All @@ -135,27 +133,21 @@ char* Allocator::translate_less_critical(RefTranslation* ref_translation_ptr, re
}
if (REALM_LIKELY(!crosses_mapping)) {
// Array fits inside primary mapping, no new mapping needed.
#if REALM_ENABLE_ENCRYPTION
realm::util::encryption_read_barrier(addr, size, txl.encrypted_mapping, nullptr);
#endif
util::encryption_read_barrier(addr, size, txl.encrypted_mapping);
return addr;
}
else {
// we need a cross-over mapping. If one is already established, use that.
auto xover_mapping_addr = txl.xover_mapping_addr.load(std::memory_order_acquire);
if (!xover_mapping_addr) {
// we need to establish a xover mapping - or wait for another thread to finish
// establishing one:
const_cast<Allocator*>(this)->get_or_add_xover_mapping(txl, idx, offset, size);
// reload (can be relaxed since the call above synchronizes on a mutex)
xover_mapping_addr = txl.xover_mapping_addr.load(std::memory_order_relaxed);
}
// array is now known to be inside the established xover mapping:
addr = xover_mapping_addr + (offset - txl.xover_mapping_base);
#if REALM_ENABLE_ENCRYPTION
realm::util::encryption_read_barrier(addr, size, txl.xover_encrypted_mapping, nullptr);
#endif
return addr;
// we need a cross-over mapping. If one is already established, use that.
auto xover_mapping_addr = txl.xover_mapping_addr.load(std::memory_order_acquire);
if (!xover_mapping_addr) {
// we need to establish a xover mapping - or wait for another thread to finish
// establishing one:
const_cast<Allocator*>(this)->get_or_add_xover_mapping(txl, idx, offset, size);
// reload (can be relaxed since the call above synchronizes on a mutex)
xover_mapping_addr = txl.xover_mapping_addr.load(std::memory_order_relaxed);
}
// array is now known to be inside the established xover mapping:
addr = xover_mapping_addr + (offset - txl.xover_mapping_base);
util::encryption_read_barrier(addr, size, txl.xover_encrypted_mapping);
return addr;
}
} // namespace realm
55 changes: 18 additions & 37 deletions src/realm/alloc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ class Allocator {
// into equal chunks.
struct RefTranslation {
char* mapping_addr;
uint64_t cookie;
uint64_t cookie = 0x1234567890;
std::atomic<size_t> lowest_possible_xover_offset = 0;

// member 'xover_mapping_addr' is used for memory synchronization of the fields
Expand All @@ -183,14 +183,12 @@ class Allocator {
#if REALM_ENABLE_ENCRYPTION
util::EncryptedFileMapping* encrypted_mapping = nullptr;
util::EncryptedFileMapping* xover_encrypted_mapping = nullptr;
#else
static inline util::EncryptedFileMapping* const encrypted_mapping = nullptr;
static inline util::EncryptedFileMapping* const xover_encrypted_mapping = nullptr;
#endif
explicit RefTranslation(char* addr)
explicit RefTranslation(char* addr = nullptr)
: mapping_addr(addr)
, cookie(0x1234567890)
{
}
RefTranslation()
: RefTranslation(nullptr)
{
}
~RefTranslation()
Expand Down Expand Up @@ -222,7 +220,7 @@ class Allocator {
};
// This pointer may be changed concurrently with access, so make sure it is
// atomic!
std::atomic<RefTranslation*> m_ref_translation_ptr;
std::atomic<RefTranslation*> m_ref_translation_ptr{nullptr};

/// The specified size must be divisible by 8, and must not be
/// zero.
Expand Down Expand Up @@ -252,7 +250,7 @@ class Allocator {
char* translate_critical(RefTranslation*, ref_type ref) const noexcept;
char* translate_less_critical(RefTranslation*, ref_type ref) const noexcept;
virtual void get_or_add_xover_mapping(RefTranslation&, size_t, size_t, size_t) = 0;
Allocator() noexcept;
Allocator() noexcept = default;
size_t get_section_index(size_t pos) const noexcept;
inline size_t get_section_base(size_t index) const noexcept;

Expand All @@ -271,11 +269,9 @@ class Allocator {
// used to detect if the allocator (and owning structure, e.g. Table)
// is recycled. Mismatch on this counter will cause accesors
// lower in the hierarchy to throw if access is attempted.
std::atomic<uint_fast64_t> m_content_versioning_counter;

std::atomic<uint_fast64_t> m_storage_versioning_counter;

std::atomic<uint_fast64_t> m_instance_versioning_counter;
std::atomic<uint_fast64_t> m_content_versioning_counter{0};
std::atomic<uint_fast64_t> m_storage_versioning_counter{0};
std::atomic<uint_fast64_t> m_instance_versioning_counter{0};

inline uint_fast64_t get_storage_version(uint64_t instance_version)
{
Expand Down Expand Up @@ -547,14 +543,6 @@ inline bool Allocator::is_read_only(ref_type ref) const noexcept
return ref < m_baseline.load(std::memory_order_relaxed);
}

inline Allocator::Allocator() noexcept
{
m_content_versioning_counter = 0;
m_storage_versioning_counter = 0;
m_instance_versioning_counter = 0;
m_ref_translation_ptr = nullptr;
}

// performance critical part of the translation process. Less critical code is in translate_less_critical.
inline char* Allocator::translate_critical(RefTranslation* ref_translation_ptr, ref_type ref) const noexcept
{
Expand All @@ -566,30 +554,23 @@ inline char* Allocator::translate_critical(RefTranslation* ref_translation_ptr,
if (REALM_LIKELY(offset < lowest_possible_xover_offset)) {
// the lowest possible xover offset may grow concurrently, but that will not affect this code path
char* addr = txl.mapping_addr + offset;
#if REALM_ENABLE_ENCRYPTION
realm::util::encryption_read_barrier(addr, NodeHeader::header_size, txl.encrypted_mapping,
NodeHeader::get_byte_size_from_header);
#endif
util::encryption_read_barrier(addr, NodeHeader::header_size, txl.encrypted_mapping);
size_t size = NodeHeader::get_byte_size_from_header(addr);
util::encryption_read_barrier(addr, size, txl.encrypted_mapping);
return addr;
}
else {
// the lowest possible xover offset may grow concurrently, but that will be handled inside the call
return translate_less_critical(ref_translation_ptr, ref);
}
// the lowest possible xover offset may grow concurrently, but that will be handled inside the call
return translate_less_critical(ref_translation_ptr, ref);
}
realm::util::terminate("Invalid ref translation entry", __FILE__, __LINE__, txl.cookie, 0x1234567890, ref, idx);
return nullptr;
}

inline char* Allocator::translate(ref_type ref) const noexcept
{
auto ref_translation_ptr = m_ref_translation_ptr.load(std::memory_order_acquire);
if (REALM_LIKELY(ref_translation_ptr)) {
return translate_critical(ref_translation_ptr, ref);
}
else {
return do_translate(ref);
if (auto ptr = m_ref_translation_ptr.load(std::memory_order_acquire); REALM_LIKELY(ptr)) {
return translate_critical(ptr, ref);
}
return do_translate(ref);
}


Expand Down
102 changes: 27 additions & 75 deletions src/realm/alloc_slab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,13 @@
*
**************************************************************************/

#include <cinttypes>
#include <type_traits>
#include <exception>
#include <algorithm>
#include <memory>
#include <mutex>
#include <map>
#include <atomic>
#include <cinttypes>
#include <cstring>
#include <exception>
#include <memory>
#include <type_traits>

#if REALM_DEBUG
#include <iostream>
Expand All @@ -35,13 +33,13 @@
#include <cstdlib>
#endif

#include <realm/util/errno.hpp>
#include <realm/util/encrypted_file_mapping.hpp>
#include <realm/util/terminate.hpp>
#include <realm/util/thread.hpp>
#include <realm/util/errno.hpp>
#include <realm/util/scope_exit.hpp>
#include <realm/util/terminate.hpp>
#include <realm/array.hpp>
#include <realm/alloc_slab.hpp>
#include <realm/disable_sync_to_disk.hpp>
#include <realm/group.hpp>

using namespace realm;
Expand Down Expand Up @@ -164,9 +162,6 @@ void SlabAlloc::detach(bool keep_file_open) noexcept
// placed correctly (logically) after the end of the file.
m_slabs.clear();
clear_freelists();
#if REALM_ENABLE_ENCRYPTION
m_realm_file_info = nullptr;
#endif

m_attach_mode = attach_None;
}
Expand Down Expand Up @@ -661,7 +656,7 @@ int SlabAlloc::get_committed_file_format_version() noexcept
// if we have mapped a file, m_mappings will have at least one mapping and
// the first will be to the start of the file. Don't come here, if we're
// just attaching a buffer. They don't have mappings.
realm::util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header));
util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header));
}
}
const Header& header = *reinterpret_cast<const Header*>(m_data);
Expand Down Expand Up @@ -805,10 +800,6 @@ ref_type SlabAlloc::attach_file(const std::string& path, Config& cfg, util::Writ
// the call below to set_encryption_key.
m_file.set_encryption_key(cfg.encryption_key);

note_reader_start(this);
util::ScopeExit reader_end_guard([this]() noexcept {
note_reader_end(this);
});
size_t size = 0;
// The size of a database file must not exceed what can be encoded in
// size_t.
Expand Down Expand Up @@ -840,26 +831,17 @@ ref_type SlabAlloc::attach_file(const std::string& path, Config& cfg, util::Writ
if (size == 0) {
if (REALM_UNLIKELY(cfg.read_only))
throw InvalidDatabase("Read-only access to empty Realm file", path);

size_t initial_size = page_size();
// exFAT does not allocate a unique id for the file until it is non-empty. It must be
// valid at this point because File::get_unique_id() is used to distinguish
// mappings_for_file in the encryption layer. So the prealloc() is required before
// interacting with the encryption layer in File::write().
// Pre-alloc initial space
m_file.prealloc(initial_size); // Throws
// seek() back to the start of the file in preparation for writing the header
// This sequence of File operations is protected from races by
// DB::m_controlmutex, so we know we are the only ones operating on the file
m_file.seek(0);
// We want all non-streaming files to be a multiple of the page size
// to simplify memory mapping, so just pre-reserve the required space now
m_file.prealloc(page_size()); // Throws
const char* data = reinterpret_cast<const char*>(&empty_file_header);
m_file.write(data, sizeof empty_file_header); // Throws
m_file.write(0, data, sizeof empty_file_header); // Throws

bool disable_sync = get_disable_sync_to_disk() || cfg.disable_sync;
if (!disable_sync)
m_file.sync(); // Throws

size = initial_size;
size = m_file.get_size();
}

ref_type top_ref = read_and_validate_header(m_file, path, size, cfg.session_initiator, m_write_observer);
Expand All @@ -883,12 +865,9 @@ ref_type SlabAlloc::attach_file(const std::string& path, Config& cfg, util::Writ
update_reader_view(size);
REALM_ASSERT(m_mappings.size());
m_data = m_mappings[0].primary_mapping.get_addr();
realm::util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header));
util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header));
dg.release(); // Do not detach
fcg.release(); // Do not close
#if REALM_ENABLE_ENCRYPTION
m_realm_file_info = util::get_file_info_for_file(m_file);
#endif
return top_ref;
}

Expand All @@ -905,40 +884,20 @@ void SlabAlloc::convert_from_streaming_form(ref_type top_ref)
{
File::Map<Header> writable_map(m_file, File::access_ReadWrite, sizeof(Header)); // Throws
Header& writable_header = *writable_map.get_addr();
realm::util::encryption_read_barrier_for_write(writable_map, 0);
util::encryption_read_barrier(writable_map, 0);
writable_header.m_top_ref[1] = top_ref;
writable_header.m_file_format[1] = writable_header.m_file_format[0];
realm::util::encryption_write_barrier(writable_map, 0);
writable_map.sync();
realm::util::encryption_read_barrier_for_write(writable_map, 0);
util::encryption_read_barrier(writable_map, 0);
writable_header.m_flags |= flags_SelectBit;
realm::util::encryption_write_barrier(writable_map, 0);
writable_map.sync();

realm::util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header));
util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header));
}
}

void SlabAlloc::note_reader_start(const void* reader_id)
{
#if REALM_ENABLE_ENCRYPTION
if (m_realm_file_info)
util::encryption_note_reader_start(*m_realm_file_info, reader_id);
#else
static_cast<void>(reader_id);
#endif
}

void SlabAlloc::note_reader_end(const void* reader_id) noexcept
{
#if REALM_ENABLE_ENCRYPTION
if (m_realm_file_info)
util::encryption_note_reader_end(*m_realm_file_info, reader_id);
#else
static_cast<void>(reader_id);
#endif
}

ref_type SlabAlloc::attach_buffer(const char* data, size_t size)
{
// ExceptionSafety: If this function throws, it must leave the allocator in
Expand Down Expand Up @@ -1009,8 +968,8 @@ ref_type SlabAlloc::read_and_validate_header(util::File& file, const std::string
{
try {
// we'll read header and (potentially) footer
File::Map<char> map_header(file, File::access_ReadOnly, sizeof(Header), 0, write_observer);
realm::util::encryption_read_barrier(map_header, 0, sizeof(Header));
File::Map<char> map_header(file, File::access_ReadOnly, sizeof(Header), write_observer);
util::encryption_read_barrier(map_header, 0, sizeof(Header));
auto header = reinterpret_cast<const Header*>(map_header.get_addr());

File::Map<char> map_footer;
Expand All @@ -1020,12 +979,12 @@ ref_type SlabAlloc::read_and_validate_header(util::File& file, const std::string
size_t footer_page_base = footer_ref & ~(page_size() - 1);
size_t footer_offset = footer_ref - footer_page_base;
map_footer = File::Map<char>(file, footer_page_base, File::access_ReadOnly,
sizeof(StreamingFooter) + footer_offset, 0, write_observer);
realm::util::encryption_read_barrier(map_footer, footer_offset, sizeof(StreamingFooter));
sizeof(StreamingFooter) + footer_offset, write_observer);
util::encryption_read_barrier(map_footer, footer_offset, sizeof(StreamingFooter));
footer = reinterpret_cast<const StreamingFooter*>(map_footer.get_addr() + footer_offset);
}

auto top_ref = validate_header(header, footer, size, path, file.get_encryption_key() != nullptr); // Throws
auto top_ref = validate_header(header, footer, size, path, file.get_encryption() != nullptr); // Throws

if (session_initiator && is_file_on_streaming_form(*header)) {
// Don't compare file format version fields as they are allowed to differ.
Expand Down Expand Up @@ -1278,7 +1237,7 @@ void SlabAlloc::update_reader_view(size_t file_size)
const size_t section_size = std::min<size_t>(1 << section_shift, file_size - section_start_offset);
if (section_size == (1 << section_shift)) {
new_mappings.push_back({util::File::Map<char>(m_file, section_start_offset, File::access_ReadOnly,
section_size, 0, m_write_observer)});
section_size, m_write_observer)});
}
else {
new_mappings.push_back({util::File::Map<char>()});
Expand All @@ -1291,7 +1250,7 @@ void SlabAlloc::update_reader_view(size_t file_size)
throw std::bad_alloc();
}
else {
new_mappings.back().primary_mapping.map(m_file, File::access_ReadOnly, section_size, 0,
new_mappings.back().primary_mapping.map(m_file, File::access_ReadOnly, section_size,
section_start_offset, m_write_observer);
}
}
Expand Down Expand Up @@ -1352,16 +1311,9 @@ void SlabAlloc::update_reader_view(size_t file_size)
void SlabAlloc::schedule_refresh_of_outdated_encrypted_pages()
{
#if REALM_ENABLE_ENCRYPTION
// callers must already hold m_mapping_mutex
for (auto& e : m_mappings) {
if (auto m = e.primary_mapping.get_encrypted_mapping()) {
encryption_mark_pages_for_IV_check(m);
}
if (auto m = e.xover_mapping.get_encrypted_mapping()) {
encryption_mark_pages_for_IV_check(m);
}
if (auto encryption = m_file.get_encryption()) {
encryption->mark_data_as_possibly_stale();
}
// unsafe to do outside writing thread: verify();
#endif // REALM_ENABLE_ENCRYPTION
}

Expand Down Expand Up @@ -1457,7 +1409,7 @@ void SlabAlloc::get_or_add_xover_mapping(RefTranslation& txl, size_t index, size
auto end_offset = file_offset + size;
auto mapping_file_offset = file_offset & ~(_page_size - 1);
auto minimal_mapping_size = end_offset - mapping_file_offset;
util::File::Map<char> mapping(m_file, mapping_file_offset, File::access_ReadOnly, minimal_mapping_size, 0,
util::File::Map<char> mapping(m_file, mapping_file_offset, File::access_ReadOnly, minimal_mapping_size,
m_write_observer);
map_entry->xover_mapping = std::move(mapping);
}
Expand Down

0 comments on commit aa3d9c0

Please sign in to comment.