Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions extension/data_loader/mman.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#pragma once

#include <executorch/runtime/platform/compiler.h>
#include <sys/stat.h>
#include <cstdint>

#ifndef _WIN32

Expand All @@ -22,6 +24,25 @@ ET_INLINE size_t get_os_page_size() {
return sysconf(_SC_PAGESIZE);
}

/**
* Platform-specific file stat function.
*/
ET_INLINE int get_file_stat(int fd, size_t* out_size) {
struct stat st;
int err = ::fstat(fd, &st);
if (err >= 0) {
*out_size = static_cast<size_t>(st.st_size);
}
return err;
}

/**
* Platform-specific mmap offset type conversion.
*/
ET_INLINE off_t get_mmap_offset(size_t offset) {
return static_cast<off_t>(offset);
}

#else

#define NOMINMAX
Expand All @@ -40,4 +61,23 @@ ET_INLINE long get_os_page_size() {
return pagesize;
}

/**
* Platform-specific file stat function.
*/
ET_INLINE int get_file_stat(int fd, size_t* out_size) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, having out_size as a size_t* should not pose an issue as we wouldn't be able to map >4gb on 32bit system anyways, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah

struct _stat64 st;
int err = ::_fstat64(fd, &st);
if (err >= 0) {
*out_size = static_cast<size_t>(st.st_size);
}
return err;
}

/**
* Platform-specific mmap offset type conversion.
*/
ET_INLINE uint64_t get_mmap_offset(size_t offset) {
return static_cast<uint64_t>(offset);
}

#endif
59 changes: 29 additions & 30 deletions extension/data_loader/mman_windows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@

#include <errno.h>
#include <io.h>
#include <cstdint>
#include <limits>
#define NOMINMAX
#include <windows.h>
#undef NOMINMAX

#ifndef STATUS_SECTION_TOO_BIG
#define STATUS_SECTION_TOO_BIG 0xC0000040L
Expand Down Expand Up @@ -129,49 +133,44 @@ static DWORD __map_mmap_prot_file(const int prot) {

} // namespace

void* mmap(void* addr, size_t len, int prot, int flags, int fildes, off_t off) {
void* mmap(
void* addr,
size_t len,
int prot,
int flags,
int fildes,
uint64_t off) {
HANDLE fm, h;

void* map = MAP_FAILED;

#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4293)
#endif

const DWORD dwFileOffsetLow = (sizeof(off_t) <= sizeof(DWORD))
? (DWORD)off
: (DWORD)(off & 0xFFFFFFFFL);
const DWORD dwFileOffsetHigh = (sizeof(off_t) <= sizeof(DWORD))
? (DWORD)0
: (DWORD)((off >> 32) & 0xFFFFFFFFL);
const DWORD protect = __map_mmap_prot_page(prot);
const DWORD desiredAccess = __map_mmap_prot_file(prot);

const off_t maxSize = off + (off_t)len;

const DWORD dwMaxSizeLow = (sizeof(off_t) <= sizeof(DWORD))
? (DWORD)maxSize
: (DWORD)(maxSize & 0xFFFFFFFFL);
const DWORD dwMaxSizeHigh = (sizeof(off_t) <= sizeof(DWORD))
? (DWORD)0
: (DWORD)((maxSize >> 32) & 0xFFFFFFFFL);

#ifdef _MSC_VER
#pragma warning(pop)
#endif

errno = 0;

if (len == 0
/* Unsupported flag combinations */
|| (flags & MAP_FIXED) != 0
/* Usupported protection combinations */
/* Unsupported protection combinations */
|| prot == PROT_EXEC) {
errno = EINVAL;
return MAP_FAILED;
}

if (off > std::numeric_limits<std::uint64_t>::max() - len) {
errno = EINVAL;
return MAP_FAILED;
}

const std::uint64_t maxSize = off + static_cast<std::uint64_t>(len);

const DWORD dwFileOffsetLow = static_cast<DWORD>(off & 0xFFFFFFFFULL);
const DWORD dwFileOffsetHigh =
static_cast<DWORD>((off >> 32) & 0xFFFFFFFFULL);
const DWORD protect = __map_mmap_prot_page(prot);
const DWORD desiredAccess = __map_mmap_prot_file(prot);

const DWORD dwMaxSizeLow = static_cast<DWORD>(maxSize & 0xFFFFFFFFULL);
const DWORD dwMaxSizeHigh =
static_cast<DWORD>((maxSize >> 32) & 0xFFFFFFFFULL);

h = ((flags & MAP_ANONYMOUS) == 0) ? (HANDLE)_get_osfhandle(fildes)
: INVALID_HANDLE_VALUE;

Expand Down
9 changes: 8 additions & 1 deletion extension/data_loader/mman_windows.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#endif

#include <sys/types.h>
#include <cstdint>

#ifdef __cplusplus
extern "C" {
Expand All @@ -56,7 +57,13 @@ extern "C" {
#define MS_SYNC 2
#define MS_INVALIDATE 4

void* mmap(void* addr, size_t len, int prot, int flags, int fildes, off_t off);
void* mmap(
void* addr,
size_t len,
int prot,
int flags,
int fildes,
uint64_t off);
int munmap(void* addr, size_t len);
int mprotect(void* addr, size_t len, int prot);
int msync(void* addr, size_t len, int flags);
Expand Down
34 changes: 11 additions & 23 deletions extension/data_loader/mmap_data_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <executorch/extension/data_loader/mmap_data_loader.h>

#include <cerrno>
#include <cstdint>
#include <cstring>
#include <limits>

Expand Down Expand Up @@ -94,8 +95,8 @@ Result<MmapDataLoader> MmapDataLoader::from(
}

// Cache the file size.
struct stat st;
int err = ::fstat(fd, &st);
size_t file_size;
int err = get_file_stat(fd, &file_size);
if (err < 0) {
ET_LOG(
Error,
Expand All @@ -106,7 +107,6 @@ Result<MmapDataLoader> MmapDataLoader::from(
::close(fd);
return Error::AccessFailed;
}
size_t file_size = st.st_size;

// Copy the filename so we can print better debug messages if reads fail.
const char* file_name_copy = ::strdup(file_name);
Expand Down Expand Up @@ -167,12 +167,6 @@ Error MmapDataLoader::validate_input(size_t offset, size_t size) const {
offset,
size,
file_size_);
ET_CHECK_OR_RETURN_ERROR(
// Recommended by a lint warning.
offset <= std::numeric_limits<off_t>::max(),
InvalidArgument,
"Offset %zu too large for off_t",
offset);
return Error::Ok;
}

Expand Down Expand Up @@ -207,13 +201,10 @@ Result<FreeableBuffer> MmapDataLoader::load(

// Map the pages read-only. Use shared mappings so that other processes
// can also map the same pages and share the same memory.
void* pages = ::mmap(
nullptr,
map_size,
PROT_READ,
MAP_SHARED,
fd_,
static_cast<off_t>(range.start));
const auto map_offset = get_mmap_offset(range.start);

void* pages =
::mmap(nullptr, map_size, PROT_READ, MAP_SHARED, fd_, map_offset);
ET_CHECK_OR_RETURN_ERROR(
pages != MAP_FAILED,
AccessFailed,
Expand Down Expand Up @@ -315,13 +306,10 @@ Error MmapDataLoader::load_into(
// Map the pages read-only. MAP_PRIVATE vs. MAP_SHARED doesn't matter since
// the data is read-only, but use PRIVATE just to further avoid accidentally
// modifying the file.
void* pages = ::mmap(
nullptr,
map_size,
PROT_READ,
MAP_PRIVATE,
fd_,
static_cast<off_t>(range.start));
const auto map_offset = get_mmap_offset(range.start);

void* pages =
::mmap(nullptr, map_size, PROT_READ, MAP_PRIVATE, fd_, map_offset);
ET_CHECK_OR_RETURN_ERROR(
pages != MAP_FAILED,
AccessFailed,
Expand Down
60 changes: 59 additions & 1 deletion extension/data_loader/test/mmap_data_loader_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <executorch/extension/data_loader/mmap_data_loader.h>

#include <cstring>
#include <vector>

#include <gtest/gtest.h>

Expand Down Expand Up @@ -428,4 +429,61 @@ TEST_F(MmapDataLoaderTest, LoadIntoCopiesOffsetCorrectly) {

// Verify memory copied correctly.
EXPECT_EQ(0, std::memcmp(dst, contents + offset, size));
}
}

// Tests that the loader can handle files requiring 64-bit file systems.
// This test verifies that offsets and sizes beyond 32-bit limits are handled
// correctly by creating a sparse file with data at a large offset.
TEST_F(MmapDataLoaderTest, LargeFileOffsetSupport) {
// We run some 32 bit tests on Linux so we need to skip this
// test.
#ifndef _WIN32
if (sizeof(off_t) <= 8) {
return;
}
#endif
// Create a sparse file with a marker at an offset beyond 2GB (32-bit limit).
// We use 3GB to ensure we're testing 64-bit offset handling.
const size_t large_offset = 3ULL * 1024 * 1024 * 1024; // 3GB
const std::string test_marker = "TEST_MARKER_AT_LARGE_OFFSET";

// Use TempFile sparse file API to create a 3GB+ file
TempFile tf(large_offset, test_marker, large_offset + test_marker.size());

// Now try to load the data using MmapDataLoader.
Result<MmapDataLoader> mdl = MmapDataLoader::from(tf.path().c_str());
ASSERT_EQ(mdl.error(), Error::Ok)
<< "Failed to create MmapDataLoader for large sparse file";

// Verify the file size is reported correctly (should be > 3GB).
Result<size_t> file_size = mdl->size();
ASSERT_EQ(file_size.error(), Error::Ok);
EXPECT_GT(*file_size, large_offset)
<< "File size should be larger than the large offset";
EXPECT_EQ(*file_size, large_offset + test_marker.size())
<< "File size should match offset + marker size";

// Try to load the marker data from the large offset.
Result<FreeableBuffer> fb = mdl->load(
large_offset,
test_marker.size(),
DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program));
ASSERT_EQ(fb.error(), Error::Ok) << "Failed to load data from large offset";

EXPECT_EQ(fb->size(), test_marker.size());
EXPECT_EQ(0, std::memcmp(fb->data(), test_marker.data(), test_marker.size()))
<< "Data at large offset does not match expected marker";

// Test load_into as well.
std::vector<uint8_t> buffer(test_marker.size());
Error err = mdl->load_into(
large_offset,
test_marker.size(),
DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program),
buffer.data());
ASSERT_EQ(err, Error::Ok) << "load_into failed for large offset";

EXPECT_EQ(
0, std::memcmp(buffer.data(), test_marker.data(), test_marker.size()))
<< "load_into data at large offset does not match expected marker";
}
Loading
Loading