Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -975,7 +975,11 @@ def test_writer_total_samples_after_close_returns_zero(self):
class TestBinaryFormatValidation(BinaryFormatTestBase):
"""Tests for malformed binary files."""

HDR_OFF_SAMPLES = 28
HDR_OFF_THREADS = 32
HDR_OFF_STR_TABLE = 36
HDR_OFF_FRAME_TABLE = 44
FILE_HEADER_PLACEHOLDER_SIZE = 64

def test_replay_rejects_more_threads_than_declared(self):
"""Replay rejects files with more unique threads than the header declares."""
Expand All @@ -1000,6 +1004,43 @@ def test_replay_rejects_more_threads_than_declared(self):
"threads than declared in header (declared 1, found at least 2)",
)

def test_replay_rejects_sample_count_mismatch(self):
"""Replay rejects files whose decoded samples disagree with the header."""
samples = [[make_interpreter(0, [
make_thread(1, [make_frame("sample.py", 10, "sample")])
])]]
filename = self.create_binary_file(samples, compression="none")

with open(filename, "r+b") as raw:
raw.seek(self.HDR_OFF_SAMPLES)
raw.write(struct.pack("=I", 2))

with BinaryReader(filename) as reader:
self.assertEqual(reader.get_info()["sample_count"], 2)
with self.assertRaises(ValueError) as cm:
reader.replay_samples(RawCollector())
self.assertEqual(
str(cm.exception),
"Sample count mismatch: header declares 2 samples "
"but replay decoded 1",
)

def test_replay_rejects_trailing_partial_sample_header(self):
"""Replay rejects partial sample bytes instead of silently stopping."""
filename = self.create_binary_file([], compression="none")
sample_data_end = self.FILE_HEADER_PLACEHOLDER_SIZE + 1

with open(filename, "r+b") as raw:
raw.seek(self.HDR_OFF_STR_TABLE)
raw.write(struct.pack("=Q", sample_data_end))
raw.seek(self.HDR_OFF_FRAME_TABLE)
raw.write(struct.pack("=Q", sample_data_end))

with BinaryReader(filename) as reader:
with self.assertRaises(ValueError) as cm:
reader.replay_samples(RawCollector())
self.assertEqual(str(cm.exception), "Truncated sample data: 1 trailing bytes")


class TestBinaryEncodings(BinaryFormatTestBase):
"""Tests specifically targeting different stack encodings."""
Expand Down
2 changes: 1 addition & 1 deletion Modules/_remote_debugging/_remote_debugging.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ typedef enum _WIN32_THREADSTATE {
#define set_exception_cause(unwinder, exc_type, message) \
do { \
assert(PyErr_Occurred() && "function returned -1 without setting exception"); \
if (unwinder->debug) { \
if (unwinder->debug && !_Py_RemoteDebug_HasPermissionError()) { \
_set_debug_exception_cause(exc_type, message); \
} \
} while (0)
Expand Down
41 changes: 23 additions & 18 deletions Modules/_remote_debugging/asyncio.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,35 +22,38 @@ _Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle)
address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio",
NULL);
if (address == 0) {
// Error out: 'python' substring covers both executable and DLL
PyObject *exc = PyErr_GetRaisedException();
PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
_PyErr_ChainExceptions1(exc);
if (!_Py_RemoteDebug_HasPermissionError()) {
PyObject *exc = PyErr_GetRaisedException();
PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
_PyErr_ChainExceptions1(exc);
}
}
#elif defined(__linux__) && HAVE_PROCESS_VM_READV
// On Linux, search for asyncio debug in executable or DLL
address = search_linux_map_for_section(handle, "AsyncioDebug", "python",
NULL);
if (address == 0) {
// Error out: 'python' substring covers both executable and DLL
PyObject *exc = PyErr_GetRaisedException();
PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
_PyErr_ChainExceptions1(exc);
if (!_Py_RemoteDebug_HasPermissionError()) {
PyObject *exc = PyErr_GetRaisedException();
PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
_PyErr_ChainExceptions1(exc);
}
}
#elif defined(__APPLE__) && TARGET_OS_OSX
// On macOS, try libpython first, then fall back to python
address = search_map_for_section(handle, "AsyncioDebug", "libpython",
NULL);
if (address == 0) {
if (address == 0 && !_Py_RemoteDebug_HasPermissionError()) {
PyErr_Clear();
address = search_map_for_section(handle, "AsyncioDebug", "python",
NULL);
}
if (address == 0) {
// Error out: 'python' substring covers both executable and DLL
PyObject *exc = PyErr_GetRaisedException();
PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
_PyErr_ChainExceptions1(exc);
if (!_Py_RemoteDebug_HasPermissionError()) {
PyObject *exc = PyErr_GetRaisedException();
PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
_PyErr_ChainExceptions1(exc);
}
}
#else
Py_UNREACHABLE();
Expand Down Expand Up @@ -96,10 +99,12 @@ ensure_async_debug_offsets(RemoteUnwinderObject *unwinder)
return -1;
}
if (result < 0) {
PyErr_Clear();
PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available");
set_exception_cause(unwinder, PyExc_RuntimeError,
"AsyncioDebug section unavailable - asyncio module may not be loaded in target process");
if (!_Py_RemoteDebug_HasPermissionError()) {
PyErr_Clear();
PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available");
set_exception_cause(unwinder, PyExc_RuntimeError,
"AsyncioDebug section unavailable - asyncio module may not be loaded in target process");
}
return -1;
}

Expand Down Expand Up @@ -218,7 +223,7 @@ parse_task_name(

if ((GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_LONG_SUBCLASS)) {
long res = read_py_long(unwinder, task_name_addr);
if (res == -1) {
if (res == -1 && PyErr_Occurred()) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Task name PyLong parsing failed");
return NULL;
}
Expand Down
72 changes: 67 additions & 5 deletions Modules/_remote_debugging/binary_io_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,22 @@ binary_reader_open(PyObject *path)
Py_fclose(fp);
goto error;
}
if (st.st_size < 0) {
PyErr_SetString(PyExc_IOError, "Invalid negative file size");
Py_fclose(fp);
goto error;
}
if ((uintmax_t)st.st_size > SIZE_MAX) {
PyErr_SetString(PyExc_OverflowError, "File is too large to map");
Py_fclose(fp);
goto error;
}
reader->mapped_size = st.st_size;
if (reader->mapped_size == 0) {
PyErr_SetString(PyExc_ValueError, "File too small for header");
Py_fclose(fp);
goto error;
}

/* Map the file into memory.
* MAP_POPULATE (Linux-only) pre-faults all pages at mmap time, which:
Expand Down Expand Up @@ -424,7 +439,10 @@ binary_reader_open(PyObject *path)
}
#endif

(void)Py_fclose(fp);
if (Py_fclose(fp) != 0) {
PyErr_SetFromErrno(PyExc_IOError);
goto error;
}

uint8_t *data = reader->mapped_data;
size_t file_size = reader->mapped_size;
Expand All @@ -444,7 +462,15 @@ binary_reader_open(PyObject *path)
PyErr_SetFromErrno(PyExc_IOError);
goto error;
}
if ((uint64_t)file_size_off > SIZE_MAX) {
PyErr_SetString(PyExc_OverflowError, "File is too large to read");
goto error;
}
reader->file_size = (size_t)file_size_off;
if (reader->file_size == 0) {
PyErr_SetString(PyExc_ValueError, "File too small for header");
goto error;
}
if (FSEEK64(reader->fp, 0, SEEK_SET) != 0) {
PyErr_SetFromErrno(PyExc_IOError);
goto error;
Expand All @@ -456,8 +482,18 @@ binary_reader_open(PyObject *path)
goto error;
}

if (fread(reader->file_data, 1, reader->file_size, reader->fp) != reader->file_size) {
PyErr_SetFromErrno(PyExc_IOError);
size_t nread = fread(reader->file_data, 1, reader->file_size, reader->fp);
if (nread != reader->file_size) {
int err = errno;
if (ferror(reader->fp) && err != 0) {
errno = err;
PyErr_SetFromErrno(PyExc_IOError);
}
else {
PyErr_Format(PyExc_ValueError,
"Unexpected end of file: read %zu of %zu bytes",
nread, reader->file_size);
}
goto error;
}

Expand Down Expand Up @@ -944,10 +980,16 @@ invoke_progress_callback(PyObject *callback, Py_ssize_t current, uint32_t total)
Py_ssize_t
binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progress_callback)
{
if (!PyObject_HasAttrString(collector, "collect")) {
PyObject *collect_method;
int has_collect = PyObject_GetOptionalAttrString(collector, "collect", &collect_method);
if (has_collect < 0) {
return -1;
}
if (has_collect == 0) {
PyErr_SetString(PyExc_TypeError, "Collector must have a collect() method");
return -1;
}
Py_DECREF(collect_method);

/* Get module state for struct sequence types */
PyObject *module = PyImport_ImportModule("_remote_debugging");
Expand All @@ -973,7 +1015,10 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
while (offset < reader->sample_data_size) {
/* Read thread_id (8 bytes) + interpreter_id (4 bytes) + encoding byte */
if (reader->sample_data_size - offset < SAMPLE_HEADER_FIXED_SIZE) {
break; /* End of data */
PyErr_Format(PyExc_ValueError,
"Truncated sample data: %zu trailing bytes",
reader->sample_data_size - offset);
return -1;
}

/* Use memcpy to avoid strict aliasing violations, then byte-swap if needed */
Expand Down Expand Up @@ -1019,6 +1064,11 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
count, max_possible_samples);
return -1;
}
if ((uint64_t)count > (uint64_t)PY_SSIZE_T_MAX - (uint64_t)replayed) {
PyErr_SetString(PyExc_OverflowError,
"Sample count exceeds Py_ssize_t maximum");
return -1;
}

reader->stats.repeat_records++;
reader->stats.repeat_samples += count;
Expand Down Expand Up @@ -1149,6 +1199,11 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
return -1;
}
Py_DECREF(timestamps_list);
if (replayed == PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
"Sample count exceeds Py_ssize_t maximum");
return -1;
}
replayed++;
reader->stats.total_samples++;
break;
Expand All @@ -1167,6 +1222,13 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
}
}

if ((uint64_t)replayed != reader->sample_count) {
PyErr_Format(PyExc_ValueError,
"Sample count mismatch: header declares %u samples but replay decoded %zd",
reader->sample_count, replayed);
return -1;
}

/* Final progress callback at 100% */
if (invoke_progress_callback(progress_callback, replayed, reader->sample_count) < 0) {
return -1;
Expand Down
42 changes: 41 additions & 1 deletion Modules/_remote_debugging/binary_io_writer.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,15 @@ fwrite_checked_allow_threads(const void *data, size_t size, FILE *fp)
written = fwrite(data, 1, size, fp);
Py_END_ALLOW_THREADS
if (written != size) {
PyErr_SetFromErrno(PyExc_IOError);
int err = errno;
if (ferror(fp) && err != 0) {
errno = err;
PyErr_SetFromErrno(PyExc_IOError);
}
else {
PyErr_Format(PyExc_IOError,
"short write: wrote %zu of %zu bytes", written, size);
}
return -1;
}
return 0;
Expand Down Expand Up @@ -366,6 +374,11 @@ writer_intern_string(BinaryWriter *writer, PyObject *string, uint32_t *index)
return 0;
}

if (writer->string_count >= UINT32_MAX) {
PyErr_SetString(PyExc_OverflowError,
"too many strings for binary format");
return -1;
}
if (writer->string_count >= writer->string_capacity) {
if (grow_parallel_arrays((void **)&writer->strings,
(void **)&writer->string_lengths,
Expand All @@ -380,6 +393,12 @@ writer_intern_string(BinaryWriter *writer, PyObject *string, uint32_t *index)
if (!str_data) {
return -1;
}
if (str_len > (Py_ssize_t)UINT32_MAX) {
PyErr_Format(PyExc_OverflowError,
"string length %zd exceeds binary format maximum %u",
str_len, UINT32_MAX);
return -1;
}

char *str_copy = PyMem_Malloc(str_len + 1);
if (!str_copy) {
Expand Down Expand Up @@ -422,6 +441,11 @@ writer_intern_frame(BinaryWriter *writer, const FrameEntry *entry, uint32_t *ind
return 0;
}

if (writer->frame_count >= UINT32_MAX) {
PyErr_SetString(PyExc_OverflowError,
"too many frames for binary format");
return -1;
}
if (GROW_ARRAY(writer->frame_entries, writer->frame_count,
writer->frame_capacity, FrameEntry) < 0) {
return -1;
Expand Down Expand Up @@ -466,6 +490,11 @@ writer_get_or_create_thread_entry(BinaryWriter *writer, uint64_t thread_id,
}
}

if (writer->thread_count >= UINT32_MAX) {
PyErr_SetString(PyExc_OverflowError,
"too many threads for binary format");
return NULL;
}
if (writer->thread_count >= writer->thread_capacity) {
ThreadEntry *new_entries = grow_array(writer->thread_entries,
&writer->thread_capacity,
Expand Down Expand Up @@ -600,6 +629,11 @@ flush_pending_rle(BinaryWriter *writer, ThreadEntry *entry)
if (!entry->has_pending_rle || entry->pending_rle_count == 0) {
return 0;
}
if (entry->pending_rle_count > UINT32_MAX - writer->total_samples) {
PyErr_SetString(PyExc_OverflowError,
"too many samples for binary format");
return -1;
}

/* Write RLE record:
* [thread_id: 8] [interpreter_id: 4] [STACK_REPEAT: 1] [count: varint]
Expand Down Expand Up @@ -644,6 +678,12 @@ write_sample_with_encoding(BinaryWriter *writer, ThreadEntry *entry,
const uint32_t *frame_indices, size_t stack_depth,
size_t shared_count, size_t pop_count, size_t push_count)
{
if (writer->total_samples == UINT32_MAX) {
PyErr_SetString(PyExc_OverflowError,
"too many samples for binary format");
return -1;
}

/* Header: thread_id(8) + interpreter_id(4) + encoding(1) + delta(varint) + status(1) */
uint8_t header_buf[SAMPLE_HEADER_MAX_SIZE];
memcpy(header_buf + SMP_OFF_THREAD_ID, &entry->thread_id, SMP_SIZE_THREAD_ID);
Expand Down
2 changes: 0 additions & 2 deletions Modules/_remote_debugging/code_objects.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t

// Read the TLBC array pointer
if (read_ptr(unwinder, tlbc_array_addr, &tlbc_array_ptr) != 0) {
PyErr_SetString(PyExc_RuntimeError, "Failed to read TLBC array pointer");
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array pointer");
return 0; // Read error
}
Expand All @@ -61,7 +60,6 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t
// Read the TLBC array size
Py_ssize_t tlbc_size;
if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(tlbc_size), &tlbc_size) != 0) {
PyErr_SetString(PyExc_RuntimeError, "Failed to read TLBC array size");
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array size");
return 0; // Read error
}
Expand Down
3 changes: 3 additions & 0 deletions Modules/_remote_debugging/module.c
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,9 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
return -1;
}
if (async_debug_result < 0) {
if (_Py_RemoteDebug_HasPermissionError()) {
return -1;
}
PyErr_Clear();
memset(&self->async_debug_offsets, 0, sizeof(self->async_debug_offsets));
self->async_debug_offsets_available = 0;
Expand Down
Loading
Loading