Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions libc/src/__support/GPU/allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,9 @@ struct Slab {

// Get the number of bytes needed to contain the bitfield bits.
constexpr static uint32_t bitfield_bytes(uint32_t chunk_size) {
return ((num_chunks(chunk_size) + BITS_IN_WORD - 1) / BITS_IN_WORD) * 8;
return __builtin_align_up(
((num_chunks(chunk_size) + BITS_IN_WORD - 1) / BITS_IN_WORD) * 8,
MIN_ALIGNMENT + 1);
}

// The actual amount of memory available excluding the bitfield and metadata.
Expand Down Expand Up @@ -584,7 +586,7 @@ void *aligned_allocate(uint32_t alignment, uint64_t size) {

// If the requested alignment is less than what we already provide this is
// just a normal allocation.
if (alignment < MIN_ALIGNMENT + 1)
if (alignment <= MIN_ALIGNMENT + 1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why this becomes <= now?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Workaround for this exact bug because I knew that the true minimum alignment was 8 and now it's 16 correctly.

return gpu::allocate(size);

// We can't handle alignments greater than 2MiB so we simply fail.
Expand All @@ -594,7 +596,7 @@ void *aligned_allocate(uint32_t alignment, uint64_t size) {
// Trying to handle allocation internally would break the assumption that each
// chunk is identical to eachother. Allocate enough memory with worst-case
// alignment and then round up. The index logic will round down properly.
uint64_t rounded = size + alignment - 1;
uint64_t rounded = size + alignment - MIN_ALIGNMENT;
void *ptr = gpu::allocate(rounded);
return __builtin_align_up(ptr, alignment);
}
Expand Down
4 changes: 2 additions & 2 deletions libc/test/integration/src/stdlib/gpu/aligned_alloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ TEST_MAIN(int, char **, char **) {
// aligned_alloc with valid alignment and size
void *ptr = LIBC_NAMESPACE::aligned_alloc(32, 16);
EXPECT_NE(ptr, nullptr);
EXPECT_EQ(__builtin_is_aligned(ptr, 32), 0U);
EXPECT_TRUE(__builtin_is_aligned(ptr, 32));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So it is not aligned to alignment before even we call aligned_alloc?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

aligned_alloc here should return an aligned pointer, it's just checking if that's true.


LIBC_NAMESPACE::free(ptr);

Expand All @@ -23,7 +23,7 @@ TEST_MAIN(int, char **, char **) {
void *div =
LIBC_NAMESPACE::aligned_alloc(alignment, (gpu::get_thread_id() + 1) * 4);
EXPECT_NE(div, nullptr);
EXPECT_EQ(__builtin_is_aligned(div, alignment), 0U);
EXPECT_TRUE(__builtin_is_aligned(div, alignment));

return 0;
}
1 change: 1 addition & 0 deletions libc/test/integration/src/stdlib/gpu/malloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ TEST_MAIN(int, char **, char **) {
int *divergent = reinterpret_cast<int *>(
LIBC_NAMESPACE::malloc((gpu::get_thread_id() + 1) * 16));
EXPECT_NE(divergent, nullptr);
EXPECT_TRUE(__builtin_is_aligned(divergent, 16));
*divergent = 1;
EXPECT_EQ(*divergent, 1);
LIBC_NAMESPACE::free(divergent);
Expand Down
Loading