From 95b96502e91cf5656295e5aa1ddb50f73a5e4b42 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Sat, 18 Oct 2025 22:01:56 -0700 Subject: [PATCH 1/8] hacky mmap change --- extension/data_loader/mmap_data_loader.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp index 10bd2f35f5e..541fc36bfa7 100644 --- a/extension/data_loader/mmap_data_loader.cpp +++ b/extension/data_loader/mmap_data_loader.cpp @@ -94,8 +94,13 @@ Result MmapDataLoader::from( } // Cache the file size. +#if defined(_WIN32) + struct _stat64 st; + int err = ::_fstat64(fd, &st); +#else struct stat st; int err = ::fstat(fd, &st); +#endif if (err < 0) { ET_LOG( Error, @@ -106,7 +111,15 @@ Result MmapDataLoader::from( ::close(fd); return Error::AccessFailed; } - size_t file_size = st.st_size; + + uint64_t file_size_u64 = static_cast(st.st_size); + ET_CHECK_OR_RETURN_ERROR( + file_size_u64 <= std::numeric_limits::max(), + NotSupported, + "File %s is too large (%llu bytes) for current platform", + file_name, + static_cast(file_size_u64)); + size_t file_size = static_cast(file_size_u64); // Copy the filename so we can print better debug messages if reads fail. const char* file_name_copy = ::strdup(file_name); From b0bee69d7ed7b26ffefdd967eac2909aab9166fe Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Sat, 18 Oct 2025 22:11:51 -0700 Subject: [PATCH 2/8] more hacky mmap edits --- extension/data_loader/mman_windows.cpp | 55 ++++++++++------------ extension/data_loader/mman_windows.h | 9 +++- extension/data_loader/mmap_data_loader.cpp | 23 +++++---- 3 files changed, 48 insertions(+), 39 deletions(-) diff --git a/extension/data_loader/mman_windows.cpp b/extension/data_loader/mman_windows.cpp index 89f9f22f467..847b2aefadd 100644 --- a/extension/data_loader/mman_windows.cpp +++ b/extension/data_loader/mman_windows.cpp @@ -21,6 +21,8 @@ #include #include +#include +#include #include #ifndef STATUS_SECTION_TOO_BIG @@ -129,49 +131,42 @@ static DWORD __map_mmap_prot_file(const int prot) { } // namespace -void* mmap(void* addr, size_t len, int prot, int flags, int fildes, off_t off) { +void* mmap( + void* addr, + size_t len, + int prot, + int flags, + int fildes, + std::uint64_t off) { HANDLE fm, h; - void* map = MAP_FAILED; -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4293) -#endif - - const DWORD dwFileOffsetLow = (sizeof(off_t) <= sizeof(DWORD)) - ? (DWORD)off - : (DWORD)(off & 0xFFFFFFFFL); - const DWORD dwFileOffsetHigh = (sizeof(off_t) <= sizeof(DWORD)) - ? (DWORD)0 - : (DWORD)((off >> 32) & 0xFFFFFFFFL); - const DWORD protect = __map_mmap_prot_page(prot); - const DWORD desiredAccess = __map_mmap_prot_file(prot); - - const off_t maxSize = off + (off_t)len; - - const DWORD dwMaxSizeLow = (sizeof(off_t) <= sizeof(DWORD)) - ? (DWORD)maxSize - : (DWORD)(maxSize & 0xFFFFFFFFL); - const DWORD dwMaxSizeHigh = (sizeof(off_t) <= sizeof(DWORD)) - ? (DWORD)0 - : (DWORD)((maxSize >> 32) & 0xFFFFFFFFL); - -#ifdef _MSC_VER -#pragma warning(pop) -#endif - errno = 0; if (len == 0 /* Unsupported flag combinations */ || (flags & MAP_FIXED) != 0 - /* Usupported protection combinations */ + /* Unsupported protection combinations */ || prot == PROT_EXEC) { errno = EINVAL; return MAP_FAILED; } + if (off > std::numeric_limits::max() - len) { + errno = EINVAL; + return MAP_FAILED; + } + + const std::uint64_t maxSize = off + static_cast(len); + + const DWORD dwFileOffsetLow = static_cast(off & 0xFFFFFFFFULL); + const DWORD dwFileOffsetHigh = static_cast((off >> 32) & 0xFFFFFFFFULL); + const DWORD protect = __map_mmap_prot_page(prot); + const DWORD desiredAccess = __map_mmap_prot_file(prot); + + const DWORD dwMaxSizeLow = static_cast(maxSize & 0xFFFFFFFFULL); + const DWORD dwMaxSizeHigh = static_cast((maxSize >> 32) & 0xFFFFFFFFULL); + h = ((flags & MAP_ANONYMOUS) == 0) ? (HANDLE)_get_osfhandle(fildes) : INVALID_HANDLE_VALUE; diff --git a/extension/data_loader/mman_windows.h b/extension/data_loader/mman_windows.h index 563db5d8b21..b9e678c121d 100644 --- a/extension/data_loader/mman_windows.h +++ b/extension/data_loader/mman_windows.h @@ -31,6 +31,7 @@ #endif #include +#include #ifdef __cplusplus extern "C" { @@ -56,7 +57,13 @@ extern "C" { #define MS_SYNC 2 #define MS_INVALIDATE 4 -void* mmap(void* addr, size_t len, int prot, int flags, int fildes, off_t off); +void* mmap( + void* addr, + size_t len, + int prot, + int flags, + int fildes, + std::uint64_t off); int munmap(void* addr, size_t len); int mprotect(void* addr, size_t len, int prot); int msync(void* addr, size_t len, int flags); diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp index 541fc36bfa7..433de270703 100644 --- a/extension/data_loader/mmap_data_loader.cpp +++ b/extension/data_loader/mmap_data_loader.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -180,12 +181,6 @@ Error MmapDataLoader::validate_input(size_t offset, size_t size) const { offset, size, file_size_); - ET_CHECK_OR_RETURN_ERROR( - // Recommended by a lint warning. - offset <= std::numeric_limits::max(), - InvalidArgument, - "Offset %zu too large for off_t", - offset); return Error::Ok; } @@ -220,13 +215,19 @@ Result MmapDataLoader::load( // Map the pages read-only. Use shared mappings so that other processes // can also map the same pages and share the same memory. +#if defined(_WIN32) + const std::uint64_t map_offset = static_cast(range.start); +#else + const off_t map_offset = static_cast(range.start); +#endif + void* pages = ::mmap( nullptr, map_size, PROT_READ, MAP_SHARED, fd_, - static_cast(range.start)); + map_offset); ET_CHECK_OR_RETURN_ERROR( pages != MAP_FAILED, AccessFailed, @@ -328,13 +329,19 @@ Error MmapDataLoader::load_into( // Map the pages read-only. MAP_PRIVATE vs. MAP_SHARED doesn't matter since // the data is read-only, but use PRIVATE just to further avoid accidentally // modifying the file. +#if defined(_WIN32) + const std::uint64_t map_offset = static_cast(range.start); +#else + const off_t map_offset = static_cast(range.start); +#endif + void* pages = ::mmap( nullptr, map_size, PROT_READ, MAP_PRIVATE, fd_, - static_cast(range.start)); + map_offset); ET_CHECK_OR_RETURN_ERROR( pages != MAP_FAILED, AccessFailed, From 247ee8d9b1cb9208b875db47c9ecc7119aefeca7 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Sat, 18 Oct 2025 22:15:32 -0700 Subject: [PATCH 3/8] minmax issue --- extension/data_loader/mman_windows.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/extension/data_loader/mman_windows.cpp b/extension/data_loader/mman_windows.cpp index 847b2aefadd..dec991376cf 100644 --- a/extension/data_loader/mman_windows.cpp +++ b/extension/data_loader/mman_windows.cpp @@ -23,7 +23,9 @@ #include #include #include +#define NOMINMAX #include +#undef NOMINMAX #ifndef STATUS_SECTION_TOO_BIG #define STATUS_SECTION_TOO_BIG 0xC0000040L From 4002f0063785835b0ae1217ab0b0a8f5d5ef714c Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Mon, 3 Nov 2025 15:18:29 -0800 Subject: [PATCH 4/8] clean up and add tests --- extension/data_loader/mman.h | 42 ++++++++++++ extension/data_loader/mman_windows.cpp | 2 +- extension/data_loader/mman_windows.h | 2 +- extension/data_loader/mmap_data_loader.cpp | 30 ++------- .../test/mmap_data_loader_test.cpp | 53 ++++++++++++++- extension/testing_util/temp_file.h | 66 ++++++++++++++++++- 6 files changed, 165 insertions(+), 30 deletions(-) diff --git a/extension/data_loader/mman.h b/extension/data_loader/mman.h index 246068986ea..65e3d5aad2b 100644 --- a/extension/data_loader/mman.h +++ b/extension/data_loader/mman.h @@ -16,18 +16,41 @@ #ifndef _WIN32 #include +#include #include + ET_INLINE size_t get_os_page_size() { return sysconf(_SC_PAGESIZE); } +/** + * Platform-specific file stat function. + */ +ET_INLINE int get_file_stat(int fd, size_t* out_size) { + struct stat st; + int err = ::fstat(fd, &st); + if (err >= 0) { + *out_size = static_cast(st.st_size); + } + return err; +} + +/** + * Platform-specific mmap offset type conversion. + */ +ET_INLINE off_t get_mmap_offset(size_t offset) { + return static_cast(offset); +} + #else #define NOMINMAX #include #undef NOMINMAX #include +#include + #include @@ -40,4 +63,23 @@ ET_INLINE long get_os_page_size() { return pagesize; } +/** + * Platform-specific file stat function. + */ +ET_INLINE int get_file_stat(int fd, size_t* out_size) { + struct _stat64 st; + int err = ::_fstat64(fd, &st); + if (err >= 0) { + *out_size = static_cast(st.st_size); + } + return err; +} + +/** + * Platform-specific mmap offset type conversion. + */ +ET_INLINE std::uint64_t get_mmap_offset(size_t offset) { + return static_cast(offset); +} + #endif diff --git a/extension/data_loader/mman_windows.cpp b/extension/data_loader/mman_windows.cpp index dec991376cf..2e70ec43a6f 100644 --- a/extension/data_loader/mman_windows.cpp +++ b/extension/data_loader/mman_windows.cpp @@ -139,7 +139,7 @@ void* mmap( int prot, int flags, int fildes, - std::uint64_t off) { + uint64_t off) { HANDLE fm, h; void* map = MAP_FAILED; diff --git a/extension/data_loader/mman_windows.h b/extension/data_loader/mman_windows.h index b9e678c121d..36e65bcda3c 100644 --- a/extension/data_loader/mman_windows.h +++ b/extension/data_loader/mman_windows.h @@ -63,7 +63,7 @@ void* mmap( int prot, int flags, int fildes, - std::uint64_t off); + uint64_t off); int munmap(void* addr, size_t len); int mprotect(void* addr, size_t len, int prot); int msync(void* addr, size_t len, int flags); diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp index 433de270703..5f64302298b 100644 --- a/extension/data_loader/mmap_data_loader.cpp +++ b/extension/data_loader/mmap_data_loader.cpp @@ -95,13 +95,8 @@ Result MmapDataLoader::from( } // Cache the file size. -#if defined(_WIN32) - struct _stat64 st; - int err = ::_fstat64(fd, &st); -#else - struct stat st; - int err = ::fstat(fd, &st); -#endif + size_t file_size; + int err = get_file_stat(fd, &file_size); if (err < 0) { ET_LOG( Error, @@ -113,15 +108,6 @@ Result MmapDataLoader::from( return Error::AccessFailed; } - uint64_t file_size_u64 = static_cast(st.st_size); - ET_CHECK_OR_RETURN_ERROR( - file_size_u64 <= std::numeric_limits::max(), - NotSupported, - "File %s is too large (%llu bytes) for current platform", - file_name, - static_cast(file_size_u64)); - size_t file_size = static_cast(file_size_u64); - // Copy the filename so we can print better debug messages if reads fail. const char* file_name_copy = ::strdup(file_name); if (file_name_copy == nullptr) { @@ -215,11 +201,7 @@ Result MmapDataLoader::load( // Map the pages read-only. Use shared mappings so that other processes // can also map the same pages and share the same memory. -#if defined(_WIN32) - const std::uint64_t map_offset = static_cast(range.start); -#else - const off_t map_offset = static_cast(range.start); -#endif + const auto map_offset = get_mmap_offset(range.start); void* pages = ::mmap( nullptr, @@ -329,11 +311,7 @@ Error MmapDataLoader::load_into( // Map the pages read-only. MAP_PRIVATE vs. MAP_SHARED doesn't matter since // the data is read-only, but use PRIVATE just to further avoid accidentally // modifying the file. -#if defined(_WIN32) - const std::uint64_t map_offset = static_cast(range.start); -#else - const off_t map_offset = static_cast(range.start); -#endif + const auto map_offset = get_mmap_offset(range.start); void* pages = ::mmap( nullptr, diff --git a/extension/data_loader/test/mmap_data_loader_test.cpp b/extension/data_loader/test/mmap_data_loader_test.cpp index 76b972c46d0..9d2ce50e35e 100644 --- a/extension/data_loader/test/mmap_data_loader_test.cpp +++ b/extension/data_loader/test/mmap_data_loader_test.cpp @@ -9,6 +9,7 @@ #include #include +#include #include @@ -428,4 +429,54 @@ TEST_F(MmapDataLoaderTest, LoadIntoCopiesOffsetCorrectly) { // Verify memory copied correctly. EXPECT_EQ(0, std::memcmp(dst, contents + offset, size)); -} \ No newline at end of file +} + +// Tests that the loader can handle files requiring 64-bit file systems. +// This test verifies that offsets and sizes beyond 32-bit limits are handled +// correctly by creating a sparse file with data at a large offset. +TEST_F(MmapDataLoaderTest, LargeFileOffsetSupport) { + // Create a sparse file with a marker at an offset beyond 2GB (32-bit limit). + // We use 3GB to ensure we're testing 64-bit offset handling. + const size_t large_offset = 3ULL * 1024 * 1024 * 1024; // 3GB + const std::string test_marker = "TEST_MARKER_AT_LARGE_OFFSET"; + + // Use TempFile sparse file API to create a 3GB+ file + TempFile tf(large_offset, test_marker, large_offset + test_marker.size()); + + // Now try to load the data using MmapDataLoader. + Result mdl = MmapDataLoader::from(tf.path().c_str()); + ASSERT_EQ(mdl.error(), Error::Ok) + << "Failed to create MmapDataLoader for large sparse file"; + + // Verify the file size is reported correctly (should be > 3GB). + Result file_size = mdl->size(); + ASSERT_EQ(file_size.error(), Error::Ok); + EXPECT_GT(*file_size, large_offset) + << "File size should be larger than the large offset"; + EXPECT_EQ(*file_size, large_offset + test_marker.size()) + << "File size should match offset + marker size"; + + // Try to load the marker data from the large offset. + Result fb = mdl->load( + large_offset, + test_marker.size(), + DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program)); + ASSERT_EQ(fb.error(), Error::Ok) + << "Failed to load data from large offset"; + + EXPECT_EQ(fb->size(), test_marker.size()); + EXPECT_EQ(0, std::memcmp(fb->data(), test_marker.data(), test_marker.size())) + << "Data at large offset does not match expected marker"; + + // Test load_into as well. + std::vector buffer(test_marker.size()); + Error err = mdl->load_into( + large_offset, + test_marker.size(), + DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program), + buffer.data()); + ASSERT_EQ(err, Error::Ok) << "load_into failed for large offset"; + + EXPECT_EQ(0, std::memcmp(buffer.data(), test_marker.data(), test_marker.size())) + << "load_into data at large offset does not match expected marker"; +} diff --git a/extension/testing_util/temp_file.h b/extension/testing_util/temp_file.h index 4edaf2135d8..a065b460937 100644 --- a/extension/testing_util/temp_file.h +++ b/extension/testing_util/temp_file.h @@ -12,8 +12,8 @@ #include #include #include +#include -#include // open() #include namespace executorch { @@ -42,6 +42,27 @@ class TempFile { CreateFile(data, size, &path_); } + /** + * Creates a sparse temporary file with a string at a specific offset. + * The file will have the specified total size, but only the data at the + * given offset will be written, creating a sparse file that doesn't + * allocate all the disk space. + * + * Example: + * // Create a 3GB file with "DATA_AT_3GB" at 3GB offset + * size_t offset = 3ULL * 1024 * 1024 * 1024; + * std::string data = "DATA_AT_3GB"; + * TempFile tf(offset, data, offset + data.size()); + * + * @param offset Byte offset where the string should be written + * @param data String to write at the specified offset + * @param file_size Total size of the sparse file (must be >= offset + + * data.size()) + */ + TempFile(size_t offset, const std::string& data, size_t file_size) { + CreateSparseFile(offset, data, file_size, &path_); + } + ~TempFile() { if (!path_.empty()) { std::remove(path_.c_str()); @@ -81,6 +102,49 @@ class TempFile { *out_path = path; } + void CreateSparseFile( + size_t offset, + const std::string& data, + size_t file_size, + std::string* out_path) { + ASSERT_GE(file_size, offset + data.size()) + << "file_size must be >= offset + data.size()"; + + // Find a unique temporary file name. + std::string path; + { + std::array buf; + const char* ret = std::tmpnam(buf.data()); + ASSERT_NE(ret, nullptr) << "Could not generate temp file"; + buf[L_tmpnam - 1] = '\0'; + path = std::string(buf.data()) + "-executorch-testing"; + } + + // Open file in binary mode for writing. + std::ofstream file(path, std::ios::out | std::ios::binary); + ASSERT_TRUE(file.is_open()) + << "open(" << path << ") failed: " << strerror(errno); + + // Seek to the offset. + file.seekp(offset, std::ios::beg); + ASSERT_TRUE(file.good()) << "Failed to seek to offset " << offset; + + // Write the data. + file.write(data.data(), data.size()); + ASSERT_TRUE(file.good()) << "Failed to write " << data.size() + << " bytes at offset " << offset; + + // Ensure file is the specified size by seeking to the end. + file.seekp(file_size - 1, std::ios::beg); + + // Write a single byte to ensure file has the correct size. + file.write("\0", 1); + file.close(); + ASSERT_TRUE(file.good() || file.eof()) << "Error closing file: " << path; + + *out_path = path; + } + // Not safely copyable/movable. TempFile(const TempFile&) = delete; TempFile& operator=(const TempFile&) = delete; From 5c8d4f9b93bbc5afd170ee02e49558852515826a Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Mon, 3 Nov 2025 15:18:59 -0800 Subject: [PATCH 5/8] lint --- extension/data_loader/mman.h | 2 -- extension/data_loader/mman_windows.cpp | 8 +++++--- extension/data_loader/mmap_data_loader.cpp | 20 +++++-------------- .../test/mmap_data_loader_test.cpp | 6 +++--- extension/testing_util/temp_file.h | 4 ++-- 5 files changed, 15 insertions(+), 25 deletions(-) diff --git a/extension/data_loader/mman.h b/extension/data_loader/mman.h index 65e3d5aad2b..acd48a78113 100644 --- a/extension/data_loader/mman.h +++ b/extension/data_loader/mman.h @@ -19,7 +19,6 @@ #include #include - ET_INLINE size_t get_os_page_size() { return sysconf(_SC_PAGESIZE); } @@ -51,7 +50,6 @@ ET_INLINE off_t get_mmap_offset(size_t offset) { #include #include - #include ET_INLINE long get_os_page_size() { diff --git a/extension/data_loader/mman_windows.cpp b/extension/data_loader/mman_windows.cpp index 2e70ec43a6f..9bc655a7827 100644 --- a/extension/data_loader/mman_windows.cpp +++ b/extension/data_loader/mman_windows.cpp @@ -21,8 +21,8 @@ #include #include -#include #include +#include #define NOMINMAX #include #undef NOMINMAX @@ -162,12 +162,14 @@ void* mmap( const std::uint64_t maxSize = off + static_cast(len); const DWORD dwFileOffsetLow = static_cast(off & 0xFFFFFFFFULL); - const DWORD dwFileOffsetHigh = static_cast((off >> 32) & 0xFFFFFFFFULL); + const DWORD dwFileOffsetHigh = + static_cast((off >> 32) & 0xFFFFFFFFULL); const DWORD protect = __map_mmap_prot_page(prot); const DWORD desiredAccess = __map_mmap_prot_file(prot); const DWORD dwMaxSizeLow = static_cast(maxSize & 0xFFFFFFFFULL); - const DWORD dwMaxSizeHigh = static_cast((maxSize >> 32) & 0xFFFFFFFFULL); + const DWORD dwMaxSizeHigh = + static_cast((maxSize >> 32) & 0xFFFFFFFFULL); h = ((flags & MAP_ANONYMOUS) == 0) ? (HANDLE)_get_osfhandle(fildes) : INVALID_HANDLE_VALUE; diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp index 5f64302298b..2271d5a3690 100644 --- a/extension/data_loader/mmap_data_loader.cpp +++ b/extension/data_loader/mmap_data_loader.cpp @@ -9,9 +9,9 @@ #include #include +#include #include #include -#include #include #include @@ -203,13 +203,8 @@ Result MmapDataLoader::load( // can also map the same pages and share the same memory. const auto map_offset = get_mmap_offset(range.start); - void* pages = ::mmap( - nullptr, - map_size, - PROT_READ, - MAP_SHARED, - fd_, - map_offset); + void* pages = + ::mmap(nullptr, map_size, PROT_READ, MAP_SHARED, fd_, map_offset); ET_CHECK_OR_RETURN_ERROR( pages != MAP_FAILED, AccessFailed, @@ -313,13 +308,8 @@ Error MmapDataLoader::load_into( // modifying the file. const auto map_offset = get_mmap_offset(range.start); - void* pages = ::mmap( - nullptr, - map_size, - PROT_READ, - MAP_PRIVATE, - fd_, - map_offset); + void* pages = + ::mmap(nullptr, map_size, PROT_READ, MAP_PRIVATE, fd_, map_offset); ET_CHECK_OR_RETURN_ERROR( pages != MAP_FAILED, AccessFailed, diff --git a/extension/data_loader/test/mmap_data_loader_test.cpp b/extension/data_loader/test/mmap_data_loader_test.cpp index 9d2ce50e35e..519d38accb6 100644 --- a/extension/data_loader/test/mmap_data_loader_test.cpp +++ b/extension/data_loader/test/mmap_data_loader_test.cpp @@ -461,8 +461,7 @@ TEST_F(MmapDataLoaderTest, LargeFileOffsetSupport) { large_offset, test_marker.size(), DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program)); - ASSERT_EQ(fb.error(), Error::Ok) - << "Failed to load data from large offset"; + ASSERT_EQ(fb.error(), Error::Ok) << "Failed to load data from large offset"; EXPECT_EQ(fb->size(), test_marker.size()); EXPECT_EQ(0, std::memcmp(fb->data(), test_marker.data(), test_marker.size())) @@ -477,6 +476,7 @@ TEST_F(MmapDataLoaderTest, LargeFileOffsetSupport) { buffer.data()); ASSERT_EQ(err, Error::Ok) << "load_into failed for large offset"; - EXPECT_EQ(0, std::memcmp(buffer.data(), test_marker.data(), test_marker.size())) + EXPECT_EQ( + 0, std::memcmp(buffer.data(), test_marker.data(), test_marker.size())) << "load_into data at large offset does not match expected marker"; } diff --git a/extension/testing_util/temp_file.h b/extension/testing_util/temp_file.h index a065b460937..3f43d662ec6 100644 --- a/extension/testing_util/temp_file.h +++ b/extension/testing_util/temp_file.h @@ -131,8 +131,8 @@ class TempFile { // Write the data. file.write(data.data(), data.size()); - ASSERT_TRUE(file.good()) << "Failed to write " << data.size() - << " bytes at offset " << offset; + ASSERT_TRUE(file.good()) + << "Failed to write " << data.size() << " bytes at offset " << offset; // Ensure file is the specified size by seeking to the end. file.seekp(file_size - 1, std::ios::beg); From 8b019fe0b7888fa898999cdf147978ac5d5c6fea Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Mon, 3 Nov 2025 15:39:10 -0800 Subject: [PATCH 6/8] minor test fix --- .../data_loader/test/mmap_data_loader_test.cpp | 7 +++++++ extension/testing_util/temp_file.h | 17 +++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/extension/data_loader/test/mmap_data_loader_test.cpp b/extension/data_loader/test/mmap_data_loader_test.cpp index 519d38accb6..df071fd7474 100644 --- a/extension/data_loader/test/mmap_data_loader_test.cpp +++ b/extension/data_loader/test/mmap_data_loader_test.cpp @@ -435,6 +435,13 @@ TEST_F(MmapDataLoaderTest, LoadIntoCopiesOffsetCorrectly) { // This test verifies that offsets and sizes beyond 32-bit limits are handled // correctly by creating a sparse file with data at a large offset. TEST_F(MmapDataLoaderTest, LargeFileOffsetSupport) { +// We run some 32 bit tests on Linux so we need to skip this +// test. +#ifndef _WIN32 + if (sizeof(off_t) <= 8) { + return; + } +#endif // Create a sparse file with a marker at an offset beyond 2GB (32-bit limit). // We use 3GB to ensure we're testing 64-bit offset handling. const size_t large_offset = 3ULL * 1024 * 1024 * 1024; // 3GB diff --git a/extension/testing_util/temp_file.h b/extension/testing_util/temp_file.h index 3f43d662ec6..0310a5dbeff 100644 --- a/extension/testing_util/temp_file.h +++ b/extension/testing_util/temp_file.h @@ -134,11 +134,20 @@ class TempFile { ASSERT_TRUE(file.good()) << "Failed to write " << data.size() << " bytes at offset " << offset; - // Ensure file is the specified size by seeking to the end. - file.seekp(file_size - 1, std::ios::beg); + // Ensure file is the specified size by seeking to the end and writing a + // byte, but only if the file needs to be extended beyond the data we just + // wrote. + if (file_size > offset + data.size()) { + file.seekp(file_size - 1, std::ios::beg); + ASSERT_TRUE(file.good()) + << "Failed to seek to file_size - 1: " << file_size - 1; + + // Write a single byte to ensure file has the correct size. + file.write("\0", 1); + ASSERT_TRUE(file.good()) + << "Failed to write final byte at position " << file_size - 1; + } - // Write a single byte to ensure file has the correct size. - file.write("\0", 1); file.close(); ASSERT_TRUE(file.good() || file.eof()) << "Error closing file: " << path; From 9e97930880bd095a2b332557e758bd05e68eb48e Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Mon, 3 Nov 2025 15:53:10 -0800 Subject: [PATCH 7/8] dedupe include --- extension/data_loader/mman.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/extension/data_loader/mman.h b/extension/data_loader/mman.h index acd48a78113..6920435db32 100644 --- a/extension/data_loader/mman.h +++ b/extension/data_loader/mman.h @@ -12,11 +12,11 @@ #pragma once #include +#include #ifndef _WIN32 #include -#include #include ET_INLINE size_t get_os_page_size() { @@ -48,7 +48,6 @@ ET_INLINE off_t get_mmap_offset(size_t offset) { #include #undef NOMINMAX #include -#include #include From 8d2761a57d27a0dd3b0e115ae04ab7412e1375ca Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Mon, 3 Nov 2025 15:55:55 -0800 Subject: [PATCH 8/8] more minor tweeks --- extension/data_loader/mman.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/extension/data_loader/mman.h b/extension/data_loader/mman.h index 6920435db32..26a9ee08067 100644 --- a/extension/data_loader/mman.h +++ b/extension/data_loader/mman.h @@ -13,6 +13,7 @@ #include #include +#include #ifndef _WIN32 @@ -75,8 +76,8 @@ ET_INLINE int get_file_stat(int fd, size_t* out_size) { /** * Platform-specific mmap offset type conversion. */ -ET_INLINE std::uint64_t get_mmap_offset(size_t offset) { - return static_cast(offset); +ET_INLINE uint64_t get_mmap_offset(size_t offset) { + return static_cast(offset); } #endif