-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[msan] Mark allocator padding as uninitialized, with new origin tag #157187
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This is follow-up work per discussion in llvm#155944 (comment). If the allocator reserves more space than the user requested (e.g., malloc(7) actually has 16 bytes reserved), the padding bytes will now be marked as uninitialized; the origin will be set as a new tag, ALLOC_PADDING (in the case of ambiguity caused by origin granularity, ALLOC will take precedence). Padding poisoning is controlled by the existing flag poison_in_malloc and a new flag, poison_in_calloc.
@llvm/pr-subscribers-compiler-rt-sanitizer Author: Thurston Dang (thurstond) ChangesThis is follow-up work per discussion in #155944 (comment). If the allocator reserves more space than the user requested (e.g., malloc(7) actually has 16 bytes reserved), the padding bytes will now be marked as uninitialized; the origin will be set as a new tag, ALLOC_PADDING (in the case of ambiguity caused by origin granularity, ALLOC will take precedence). Padding poisoning is controlled by the existing flag poison_in_malloc and a new flag, poison_in_calloc. Full diff: https://github.com/llvm/llvm-project/pull/157187.diff 6 Files Affected:
diff --git a/compiler-rt/lib/msan/msan.h b/compiler-rt/lib/msan/msan.h
index 7fb58be67a02c..edb26997be07d 100644
--- a/compiler-rt/lib/msan/msan.h
+++ b/compiler-rt/lib/msan/msan.h
@@ -303,6 +303,7 @@ u32 ChainOrigin(u32 id, StackTrace *stack);
const int STACK_TRACE_TAG_POISON = StackTrace::TAG_CUSTOM + 1;
const int STACK_TRACE_TAG_FIELDS = STACK_TRACE_TAG_POISON + 1;
const int STACK_TRACE_TAG_VPTR = STACK_TRACE_TAG_FIELDS + 1;
+const int STACK_TRACE_TAG_ALLOC_PADDING = STACK_TRACE_TAG_VPTR + 1;
#define GET_MALLOC_STACK_TRACE \
UNINITIALIZED BufferedStackTrace stack; \
diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp
index 64df863839c06..1ee685547de54 100644
--- a/compiler-rt/lib/msan/msan_allocator.cpp
+++ b/compiler-rt/lib/msan/msan_allocator.cpp
@@ -217,13 +217,35 @@ static void *MsanAllocate(BufferedStackTrace *stack, uptr size, uptr alignment,
}
auto *meta = reinterpret_cast<Metadata *>(allocator.GetMetaData(allocated));
meta->requested_size = size;
+ uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(allocated);
+ void* padding_start =
+ reinterpret_cast<void*>(reinterpret_cast<uptr>(allocated) + size);
+ uptr padding_size = actually_allocated_size - size;
+
+ // Origins have 4-byte granularity. Set the TAG_ALLOC_PADDING origin first,
+ // so the TAG_ALLOC origin will take precedence if necessary e.g.,
+ // - if we have malloc(7) that actually takes up 16 bytes, bytes 0-7 will
+ // have TAG_ALLOC and bytes 8-15 will have TAG_ALLOC_PADDING.
+ // - with calloc, bytes 4-15 will have TAG_ALLOC_PADDING (but bytes 4-6 are
+ // initialized, hence the origin is unused). Note that this means, unlike
+ // with malloc, byte 7 is TAG_ALLOC_PADDING instead of TAG_ALLOC.
+ if (__msan_get_track_origins() && ((zero && flags()->poison_in_calloc) ||
+ (!zero && flags()->poison_in_malloc))) {
+ stack->tag = STACK_TRACE_TAG_ALLOC_PADDING;
+ Origin o2 = Origin::CreateHeapOrigin(stack);
+ __msan_set_origin(padding_start, padding_size, o2.raw_id());
+ }
+
if (zero) {
if (allocator.FromPrimary(allocated))
__msan_clear_and_unpoison(allocated, size);
else
__msan_unpoison(allocated, size); // Mem is already zeroed.
+
+ if (flags()->poison_in_calloc)
+ __msan_poison(padding_start, padding_size);
} else if (flags()->poison_in_malloc) {
- __msan_poison(allocated, size);
+ __msan_poison(allocated, actually_allocated_size);
if (__msan_get_track_origins()) {
stack->tag = StackTrace::TAG_ALLOC;
Origin o = Origin::CreateHeapOrigin(stack);
@@ -231,11 +253,6 @@ static void *MsanAllocate(BufferedStackTrace *stack, uptr size, uptr alignment,
}
}
- uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(allocated);
- // For compatibility, the allocator converted 0-sized allocations into 1 byte
- if (size == 0 && actually_allocated_size > 0 && flags()->poison_in_malloc)
- __msan_poison(allocated, 1);
-
UnpoisonParam(2);
RunMallocHooks(allocated, size);
return allocated;
@@ -247,7 +264,7 @@ void __msan::MsanDeallocate(BufferedStackTrace *stack, void *p) {
RunFreeHooks(p);
Metadata *meta = reinterpret_cast<Metadata *>(allocator.GetMetaData(p));
- uptr size = meta->requested_size;
+ uptr size = allocator.GetActuallyAllocatedSize(p);
meta->requested_size = 0;
// This memory will not be reused by anyone else, so we are free to keep it
// poisoned. The secondary allocator will unmap and unpoison by
diff --git a/compiler-rt/lib/msan/msan_flags.inc b/compiler-rt/lib/msan/msan_flags.inc
index 16db26bd42ed9..2d600e79e2337 100644
--- a/compiler-rt/lib/msan/msan_flags.inc
+++ b/compiler-rt/lib/msan/msan_flags.inc
@@ -22,6 +22,7 @@ MSAN_FLAG(int, origin_history_size, Origin::kMaxDepth, "")
MSAN_FLAG(int, origin_history_per_stack_limit, 20000, "")
MSAN_FLAG(bool, poison_heap_with_zeroes, false, "")
MSAN_FLAG(bool, poison_stack_with_zeroes, false, "")
+MSAN_FLAG(bool, poison_in_calloc, true, "")
MSAN_FLAG(bool, poison_in_malloc, true, "")
MSAN_FLAG(bool, poison_in_free, true, "")
MSAN_FLAG(bool, poison_in_dtor, true, "")
diff --git a/compiler-rt/lib/msan/msan_report.cpp b/compiler-rt/lib/msan/msan_report.cpp
index 99bf81f66dc9e..f19e20a11efc5 100644
--- a/compiler-rt/lib/msan/msan_report.cpp
+++ b/compiler-rt/lib/msan/msan_report.cpp
@@ -90,6 +90,11 @@ static void DescribeOrigin(u32 id) {
Printf(" %sVirtual table ptr was destroyed%s\n", d.Origin(),
d.Default());
break;
+ case STACK_TRACE_TAG_ALLOC_PADDING:
+ Printf(
+ " %sUninitialized value was created by heap allocator padding%s\n",
+ d.Origin(), d.Default());
+ break;
default:
Printf(" %sUninitialized value was created%s\n", d.Origin(),
d.Default());
diff --git a/compiler-rt/test/msan/allocator_padding.cpp b/compiler-rt/test/msan/allocator_padding.cpp
new file mode 100644
index 0000000000000..a5db28a463297
--- /dev/null
+++ b/compiler-rt/test/msan/allocator_padding.cpp
@@ -0,0 +1,59 @@
+// malloc: all bytes are uninitialized
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 0 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 6 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+//
+// This test assumes the allocator allocates 16 bytes for malloc(7). Bytes
+// 7-15 are padding.
+// Edge case: when the origin granularity spans both ALLOC and ALLOC_PADDING,
+// ALLOC takes precedence
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 7 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+//
+// Bytes 8-15 are tagged as ALLOC_PADDING.
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 8 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 15 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+
+// calloc
+// Bytes 0-6 are fully initialized, so no MSan report should happen.
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && %run %t 0 2>&1
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && %run %t 6 2>&1
+//
+// Byte 7 is uninitialized. Unlike malloc, this is tagged as ALLOC_PADDING
+// (since the origin does not need to track bytes 4-6).
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 7 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+//
+// As with malloc, Bytes 8-15 are tagged as ALLOC_PADDING.
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 8 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 15 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char **argv) {
+#ifdef USE_CALLOC
+ char *p = (char *)calloc(7, 1);
+#else
+ char *p = (char *)malloc(7);
+#endif
+
+ if (argc == 2) {
+ int index = atoi(argv[1]);
+
+ printf("p[%d] = %d\n", index, p[index]);
+ // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
+ // CHECK: {{#0 0x.* in main .*allocator_padding.cpp:}}[[@LINE-2]]
+ // ORIGIN-ALLOC: Uninitialized value was created by a heap allocation
+ // ORIGIN-ALLOC-PADDING: Uninitialized value was created by heap allocator padding
+ free(p);
+ }
+
+ return 0;
+}
diff --git a/compiler-rt/test/msan/zero_alloc.cpp b/compiler-rt/test/msan/zero_alloc.cpp
index 1451e1e89e9fb..f5d8d11c71697 100644
--- a/compiler-rt/test/msan/zero_alloc.cpp
+++ b/compiler-rt/test/msan/zero_alloc.cpp
@@ -1,4 +1,7 @@
-// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory %s -o %t && not %run %t 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGINS
#include <stdio.h>
#include <stdlib.h>
@@ -10,6 +13,7 @@ int main(int argc, char **argv) {
printf("Content of p1 is: %d\n", *p1);
// CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
// CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]]
+ // ORIGINS: Uninitialized value was created by heap allocator padding
free(p1);
}
@@ -19,6 +23,7 @@ int main(int argc, char **argv) {
printf("Content of p2 is: %d\n", *p2);
// CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
// CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]]
+ // ORIGINS: Uninitialized value was created by heap allocator padding
free(p2);
}
@@ -28,6 +33,7 @@ int main(int argc, char **argv) {
printf("Content of p2 is: %d\n", *p3);
// CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
// CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]]
+ // ORIGINS: Uninitialized value was created by heap allocator padding
free(p3);
}
|
compiler-rt/lib/msan/msan_flags.inc
Outdated
MSAN_FLAG(int, origin_history_per_stack_limit, 20000, "") | ||
MSAN_FLAG(bool, poison_heap_with_zeroes, false, "") | ||
MSAN_FLAG(bool, poison_stack_with_zeroes, false, "") | ||
MSAN_FLAG(bool, poison_in_calloc, true, "") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we really need a separate flag for this? we don't have poison_in_posix_memalign
either.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
posix_memalign shares the (misleadingly-named) poison_in_malloc
flag.
It would deepen the confusion if calloc shared the poison_in_malloc
flag.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
everything shares this flag (malloc, realloc, reallocarray, pvalloc, valloc, aligned_alloc, memalign, posix_memalign), which means it means "poison in alloc" essentially. IMO adding a new flag adds more confusion, not less.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
which means it means "poison in alloc" essentially
But that's not what the flag is named. If the flag was named poison_in_alloc
, there would be a stronger case to reuse it for calloc
.
Besides, there's a difference in operation:
poison_in_malloc
poisons the entire requested allocation plus the paddingpoison_in_calloc
only poisons the padding
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's what the flag does though; it doesn't matter what it is called. If we started adding separate flags for all the other operations, and didn't respect the poison_in_malloc
flag for them anymore, that would break people.
Leaving to @vitalybuka to tie-break here. If we are doing this, call it something else. poison_in_malloc
+ poison_in_calloc
implies there are poison_in_X
, which there aren't; and there probably never will be.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Besides, there's a difference in operation:
(If you called it poison_padding
and respected that flag both in malloc
and calloc
, I'd have no problem)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(If you called it poison_padding and respected that flag both in malloc and calloc, I'd have no problem)
I'll do that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
compiler-rt/lib/msan/msan_flags.inc
Outdated
MSAN_FLAG(int, origin_history_per_stack_limit, 20000, "") | ||
MSAN_FLAG(bool, poison_heap_with_zeroes, false, "") | ||
MSAN_FLAG(bool, poison_stack_with_zeroes, false, "") | ||
MSAN_FLAG(bool, poison_in_padding, true, "") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why do we need another flag?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's not add one if we don't have really strong use-case
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It controls the new, backwards-incompatible behavior
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We do that only if we expect large cleanups. It's not the case here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you want the padding to be poisoned unconditionally, or is there an interaction with existing flags (e.g., poison_in_malloc)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just rely on poison_in_malloc
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
|
||
Metadata *meta = reinterpret_cast<Metadata *>(allocator.GetMetaData(p)); | ||
uptr size = meta->requested_size; | ||
uptr size = allocator.GetActuallyAllocatedSize(p); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you need to bump it only in line 280 brahcn
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
// bytes 0-6: initialized, origin not set (and irrelevant) | ||
// byte 7: uninitialized, origin TAG_ALLOC_PADDING (unlike malloc) | ||
// bytes 8-15: uninitialized, origin TAG_ALLOC_PADDING | ||
if (__msan_get_track_origins() && flags()->poison_in_malloc) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would recommend to move it next to corresponding __msan_poison
Now reader needs to guess whare it coming from
also you probably will save on on if __msan_get_track_origins, which probably negligle
but Origin::CreateHeapOrigin(stack); is expensive
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, because of difference of tag you need two CreateHeapOrigin.
But they are hash table lookup each, it's likely the most expensive part of the change :(
maybe only for __msan_get_track_origins() > 1,
__msan_get_track_origins() == 1 just keep TAG_ALLOC for entire thing
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done: 69bf311
compiler-rt/lib/msan/msan_report.cpp
Outdated
break; | ||
case STACK_TRACE_TAG_ALLOC_PADDING: | ||
Printf( | ||
" %sUninitialized value was created by heap allocator padding%s\n", |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
something: Uninitialized value is outside of heap allocation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done: 7b98f59
✅ With the latest revision this PR passed the C/C++ code formatter. |
This is follow-up work per discussion in #155944 (comment).
If the allocator reserves more space than the user requested (e.g.,
malloc(7)
andcalloc(7,1)
actually have 16 bytes reserved), the padding bytes will now be marked as uninitialized.Padding poisoning is controlled by the existing flag
poison_in_malloc
(which applies to all allocation functions, not only malloc).Origin tag:
calloc
or with track-origins > 1, the origin will be set as a new tag,ALLOC_PADDING
ALLOC
tag will be used.ALLOC
will take precedence.