Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion Modules/_remote_debugging/binary_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,36 @@ extern "C" {
#define HDR_SIZE_COMPRESSION 4
#define FILE_HEADER_SIZE (HDR_OFF_COMPRESSION + HDR_SIZE_COMPRESSION)
#define FILE_HEADER_PLACEHOLDER_SIZE 64
#define SAMPLE_HEADER_FIXED_SIZE (sizeof(uint64_t) + sizeof(uint32_t) + 1)

static_assert(FILE_HEADER_SIZE <= FILE_HEADER_PLACEHOLDER_SIZE,
"FILE_HEADER_SIZE exceeds FILE_HEADER_PLACEHOLDER_SIZE");

/* Sample header field offsets and sizes */
#define SMP_OFF_THREAD_ID 0
#define SMP_SIZE_THREAD_ID sizeof(uint64_t)
#define SMP_OFF_INTERPRETER_ID (SMP_OFF_THREAD_ID + SMP_SIZE_THREAD_ID)
#define SMP_SIZE_INTERPRETER_ID sizeof(uint32_t)
#define SMP_OFF_ENCODING (SMP_OFF_INTERPRETER_ID + SMP_SIZE_INTERPRETER_ID)
#define SMP_SIZE_ENCODING sizeof(uint8_t)
#define SAMPLE_HEADER_FIXED_SIZE (SMP_OFF_ENCODING + SMP_SIZE_ENCODING)

static_assert(SAMPLE_HEADER_FIXED_SIZE == 13,
"SAMPLE_HEADER_FIXED_SIZE must remain 13");

/* Footer field offsets and sizes */
#define FTR_OFF_STRINGS 0
#define FTR_SIZE_STRINGS sizeof(uint32_t)
#define FTR_OFF_FRAMES (FTR_OFF_STRINGS + FTR_SIZE_STRINGS)
#define FTR_SIZE_FRAMES sizeof(uint32_t)
#define FTR_OFF_FILE_SIZE (FTR_OFF_FRAMES + FTR_SIZE_FRAMES)
#define FTR_SIZE_FILE_SIZE sizeof(uint64_t)
#define FTR_OFF_CHECKSUM (FTR_OFF_FILE_SIZE + FTR_SIZE_FILE_SIZE)
#define FTR_SIZE_CHECKSUM (2 * sizeof(uint64_t))
#define FILE_FOOTER_SIZE (FTR_OFF_CHECKSUM + FTR_SIZE_CHECKSUM)

static_assert(FILE_FOOTER_SIZE == 32,
"FILE_FOOTER_SIZE must remain 32");

/* Buffer sizes: 512KB balances syscall amortization against memory use,
* and aligns well with filesystem block sizes and zstd dictionary windows */
#define WRITE_BUFFER_SIZE (512 * 1024)
Expand Down
20 changes: 8 additions & 12 deletions Modules/_remote_debugging/binary_io_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,11 @@
* ============================================================================ */

/* File structure sizes */
#define FILE_FOOTER_SIZE 32
#define MIN_DECOMPRESS_BUFFER_SIZE (64 * 1024) /* Minimum decompression buffer */

/* Progress callback frequency */
#define PROGRESS_CALLBACK_INTERVAL 1000

/* Maximum decompression size limit (1GB) */
#define MAX_DECOMPRESS_SIZE (1ULL << 30)

/* ============================================================================
* BINARY READER IMPLEMENTATION
* ============================================================================ */
Expand All @@ -47,8 +43,8 @@ reader_parse_header(BinaryReader *reader, const uint8_t *data, size_t file_size)
/* Use memcpy to avoid strict aliasing violations and unaligned access */
uint32_t magic;
uint32_t version;
memcpy(&magic, &data[0], sizeof(magic));
memcpy(&version, &data[4], sizeof(version));
memcpy(&magic, &data[HDR_OFF_MAGIC], HDR_SIZE_MAGIC);
memcpy(&version, &data[HDR_OFF_VERSION], HDR_SIZE_VERSION);

/* Detect endianness from magic number */
if (magic == BINARY_FORMAT_MAGIC) {
Expand Down Expand Up @@ -119,8 +115,8 @@ reader_parse_footer(BinaryReader *reader, const uint8_t *data, size_t file_size)
const uint8_t *footer = data + file_size - FILE_FOOTER_SIZE;
/* Use memcpy to avoid strict aliasing violations */
uint32_t strings_count, frames_count;
memcpy(&strings_count, &footer[0], sizeof(strings_count));
memcpy(&frames_count, &footer[4], sizeof(frames_count));
memcpy(&strings_count, &footer[FTR_OFF_STRINGS], FTR_SIZE_STRINGS);
memcpy(&frames_count, &footer[FTR_OFF_FRAMES], FTR_SIZE_FRAMES);

reader->strings_count = SWAP32_IF(reader->needs_swap, strings_count);
reader->frames_count = SWAP32_IF(reader->needs_swap, frames_count);
Expand Down Expand Up @@ -984,11 +980,11 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
/* Use memcpy to avoid strict aliasing violations, then byte-swap if needed */
uint64_t thread_id_raw;
uint32_t interpreter_id_raw;
memcpy(&thread_id_raw, &reader->sample_data[offset], sizeof(thread_id_raw));
offset += 8;
memcpy(&thread_id_raw, &reader->sample_data[offset], SMP_SIZE_THREAD_ID);
offset += SMP_SIZE_THREAD_ID;

memcpy(&interpreter_id_raw, &reader->sample_data[offset], sizeof(interpreter_id_raw));
offset += 4;
memcpy(&interpreter_id_raw, &reader->sample_data[offset], SMP_SIZE_INTERPRETER_ID);
offset += SMP_SIZE_INTERPRETER_ID;

uint64_t thread_id = SWAP64_IF(reader->needs_swap, thread_id_raw);
uint32_t interpreter_id = SWAP32_IF(reader->needs_swap, interpreter_id_raw);
Expand Down
29 changes: 13 additions & 16 deletions Modules/_remote_debugging/binary_io_writer.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@
/* Frame buffer: depth varint (max 2 bytes for 256) + 256 frames * 5 bytes/varint + margin */
#define MAX_FRAME_BUFFER_SIZE ((MAX_STACK_DEPTH * MAX_VARINT_SIZE_U32) + MAX_VARINT_SIZE_U32 + 16)

/* File structure sizes */
#define FILE_FOOTER_SIZE 32

/* Helper macro: convert PyLong to int32, using default_val if conversion fails */
#define PYLONG_TO_INT32_OR_DEFAULT(obj, var, default_val) \
do { \
Expand Down Expand Up @@ -588,9 +585,9 @@ static inline int
write_sample_header(BinaryWriter *writer, ThreadEntry *entry, uint8_t encoding)
{
uint8_t header[SAMPLE_HEADER_FIXED_SIZE];
memcpy(header, &entry->thread_id, 8);
memcpy(header + 8, &entry->interpreter_id, 4);
header[12] = encoding;
memcpy(header + SMP_OFF_THREAD_ID, &entry->thread_id, SMP_SIZE_THREAD_ID);
memcpy(header + SMP_OFF_INTERPRETER_ID, &entry->interpreter_id, SMP_SIZE_INTERPRETER_ID);
header[SMP_OFF_ENCODING] = encoding;
return writer_write_bytes(writer, header, SAMPLE_HEADER_FIXED_SIZE);
}

Expand Down Expand Up @@ -649,9 +646,9 @@ write_sample_with_encoding(BinaryWriter *writer, ThreadEntry *entry,
{
/* Header: thread_id(8) + interpreter_id(4) + encoding(1) + delta(varint) + status(1) */
uint8_t header_buf[SAMPLE_HEADER_MAX_SIZE];
memcpy(header_buf, &entry->thread_id, 8);
memcpy(header_buf + 8, &entry->interpreter_id, 4);
header_buf[12] = (uint8_t)encoding_type;
memcpy(header_buf + SMP_OFF_THREAD_ID, &entry->thread_id, SMP_SIZE_THREAD_ID);
memcpy(header_buf + SMP_OFF_INTERPRETER_ID, &entry->interpreter_id, SMP_SIZE_INTERPRETER_ID);
header_buf[SMP_OFF_ENCODING] = (uint8_t)encoding_type;
size_t varint_len = encode_varint_u64(
header_buf + SAMPLE_HEADER_FIXED_SIZE,
timestamp_delta);
Expand Down Expand Up @@ -1145,17 +1142,17 @@ binary_writer_finalize(BinaryWriter *writer)
PyErr_SetFromErrno(PyExc_IOError);
return -1;
}
uint64_t file_size = (uint64_t)footer_offset + 32;
uint8_t footer[32] = {0};
uint64_t file_size = (uint64_t)footer_offset + FILE_FOOTER_SIZE;
uint8_t footer[FILE_FOOTER_SIZE] = {0};
/* Cast size_t to uint32_t before memcpy to ensure correct bytes are copied
* on both little-endian and big-endian systems (size_t is 8 bytes on 64-bit) */
uint32_t string_count_u32 = (uint32_t)writer->string_count;
uint32_t frame_count_u32 = (uint32_t)writer->frame_count;
memcpy(footer + 0, &string_count_u32, 4);
memcpy(footer + 4, &frame_count_u32, 4);
memcpy(footer + 8, &file_size, 8);
/* bytes 16-31: checksum placeholder (zeros) */
if (fwrite_checked_allow_threads(footer, 32, writer->fp) < 0) {
memcpy(footer + FTR_OFF_STRINGS, &string_count_u32, FTR_SIZE_STRINGS);
memcpy(footer + FTR_OFF_FRAMES, &frame_count_u32, FTR_SIZE_FRAMES);
memcpy(footer + FTR_OFF_FILE_SIZE, &file_size, FTR_SIZE_FILE_SIZE);
/* checksum (FTR_OFF_CHECKSUM..FILE_FOOTER_SIZE-1): placeholder zeros */
if (fwrite_checked_allow_threads(footer, FILE_FOOTER_SIZE, writer->fp) < 0) {
return -1;
}

Expand Down
Loading