From 60b49fc77adf83315b6c2080cd8047e245867096 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Mon, 12 Aug 2024 11:52:10 -0700 Subject: [PATCH] Implement load_into for file data loader Summary: Title Differential Revision: D61147536 --- extension/data_loader/file_data_loader.cpp | 109 +++++++++++++-------- extension/data_loader/file_data_loader.h | 6 ++ 2 files changed, 74 insertions(+), 41 deletions(-) diff --git a/extension/data_loader/file_data_loader.cpp b/extension/data_loader/file_data_loader.cpp index 7b041fef002..bf06d0c9bee 100644 --- a/extension/data_loader/file_data_loader.cpp +++ b/extension/data_loader/file_data_loader.cpp @@ -49,7 +49,6 @@ static uint8_t* align_pointer(void* ptr, size_t alignment) { addr = (addr | (alignment - 1)) + 1; return reinterpret_cast(addr); } - } // namespace FileDataLoader::~FileDataLoader() { @@ -143,19 +142,6 @@ Result FileDataLoader::load( return FreeableBuffer(nullptr, 0, /*free_fn=*/nullptr); } - // Seek to the right place in the file. - off_t seek_offset = ::lseek(fd_, offset, SEEK_SET); - if (seek_offset != offset) { - ET_LOG( - Error, - "Seeking %s to offset %zu returned %zd: %s", - file_name_, - offset, - (ssize_t)seek_offset, - strerror(errno)); - return Error::AccessFailed; - } - // Allocate memory for the FreeableBuffer. size_t alloc_size = size; if (alignment_ > alignof(std::max_align_t)) { @@ -187,9 +173,75 @@ Result FileDataLoader::load( buffer, alloc_size); + auto err = load_into(offset, size, segment_info, aligned_buffer); + if (err != Error::Ok) { + // Free `buffer`, which is what malloc() gave us, not `aligned_buffer`. + std::free(buffer); + return err; + } + + // We can't naively free this pointer, since it may not be what malloc() gave + // us. Pass the offset to the real buffer as context. This is the number of + // bytes that need to be subtracted from the FreeableBuffer::data() pointer to + // find the actual pointer to free. + return FreeableBuffer( + aligned_buffer, + size, + FreeSegment, + /*free_fn_context=*/ + reinterpret_cast( + // Using signed types here because it will produce a signed ptrdiff_t + // value, though for us it will always be non-negative. + reinterpret_cast(aligned_buffer) - + reinterpret_cast(buffer))); +} + +Result FileDataLoader::size() const { + ET_CHECK_OR_RETURN_ERROR( + // Probably had its value moved to another instance. + fd_ >= 0, + InvalidState, + "Uninitialized"); + return file_size_; +} + +__ET_NODISCARD Error FileDataLoader::load_into( + size_t offset, + size_t size, + __ET_UNUSED const SegmentInfo& segment_info, + void* buffer) { + ET_CHECK_OR_RETURN_ERROR( + // Probably had its value moved to another instance. + fd_ >= 0, + InvalidState, + "Uninitialized"); + ET_CHECK_OR_RETURN_ERROR( + offset + size <= file_size_, + InvalidArgument, + "File %s: offset %zu + size %zu > file_size_ %zu", + file_name_, + offset, + size, + file_size_); + ET_CHECK_OR_RETURN_ERROR( + buffer != nullptr, InvalidArgument, "Provided buffer cannot be null"); + + // Seek to the right place in the file. + off_t seek_offset = ::lseek(fd_, offset, SEEK_SET); + if (seek_offset != offset) { + ET_LOG( + Error, + "Seeking %s to offset %zu returned %zd: %s", + file_name_, + offset, + (ssize_t)seek_offset, + strerror(errno)); + return Error::AccessFailed; + } + // Read the data into the aligned address. size_t needed = size; - uint8_t* buf = reinterpret_cast(aligned_buffer); + uint8_t* buf = reinterpret_cast(buffer); while (needed > 0) { // Reads on macos will fail with EINVAL if size > INT32_MAX. ssize_t nread = ::read( @@ -211,37 +263,12 @@ Result FileDataLoader::load( size, offset, nread == 0 ? "EOF" : strerror(errno)); - // Free `buffer`, which is what malloc() gave us, not `aligned_buffer`. - std::free(buffer); return Error::AccessFailed; } needed -= nread; buf += nread; } - - // We can't naively free this pointer, since it may not be what malloc() gave - // us. Pass the offset to the real buffer as context. This is the number of - // bytes that need to be subtracted from the FreeableBuffer::data() pointer to - // find the actual pointer to free. - return FreeableBuffer( - aligned_buffer, - size, - FreeSegment, - /*free_fn_context=*/ - reinterpret_cast( - // Using signed types here because it will produce a signed ptrdiff_t - // value, though for us it will always be non-negative. - reinterpret_cast(aligned_buffer) - - reinterpret_cast(buffer))); -} - -Result FileDataLoader::size() const { - ET_CHECK_OR_RETURN_ERROR( - // Probably had its value moved to another instance. - fd_ >= 0, - InvalidState, - "Uninitialized"); - return file_size_; + return Error::Ok; } } // namespace util diff --git a/extension/data_loader/file_data_loader.h b/extension/data_loader/file_data_loader.h index c6ab25933a5..b7cfe3a1b98 100644 --- a/extension/data_loader/file_data_loader.h +++ b/extension/data_loader/file_data_loader.h @@ -72,6 +72,12 @@ class FileDataLoader : public DataLoader { __ET_NODISCARD Result size() const override; + __ET_NODISCARD Error load_into( + size_t offset, + size_t size, + __ET_UNUSED const SegmentInfo& segment_info, + void* buffer) override; + private: FileDataLoader( int fd,