-
Notifications
You must be signed in to change notification settings - Fork 21.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Summary: AllocationPlan: Stores the sequence of allocations, their sizes and liftime of the allocations. Along with this it also stores the total size of a single memory blob, total_size, required to satisfy all the allocations. It also stores the offsets in the blob, of size total_size, corresponding to each allocation. Thus allocation plan contains: - allocation sizes - allocation lifetimes - allocation offsets - total size AllocationPlaner: Takes a pointer to the allocation plan and fills it ups with plan, i.e. sizes, lifetimes, offsets, total size. This is done via WithProfileAllocationsGuard which takes in AllocationPlan* and constructs AllocationPlanner* and set the thread local allocation_planner to it. MobileCPUAllocator profiles allocations via allocation_planner. In WithValidateAllocationsGuard, allocations profiled in the allocation plan are validated. CPUProfilingAllocator: Application owns CPUProfilingAllocator Using WithProfilingAllocatorGuard, it passes both CPUProfilingAllocator and AllocationPlan created earlier. Then CPUProfilingAllocator will manage allocations and frees according to the plan. Allocations that are not managed by CPUProfilingAllocator will be routed through c10::alloc_cpu, c10::free_cpu. Test Plan: cpu_profiling_allocator_test on mobile. Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: 1c1b2d3d87130a57431a9455b4cb18a4935bdbd5 Pull Request resolved: #43951
- Loading branch information
1 parent
45ddeb5
commit e85c4a4
Showing
5 changed files
with
744 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
#include <gtest/gtest.h> | ||
|
||
#include <c10/mobile/CPUProfilingAllocator.h> | ||
#include <ATen/ATen.h> | ||
|
||
at::Tensor run_with_control_flow( | ||
at::Tensor input, | ||
at::Tensor conv_weight, | ||
at::Tensor linear_weight, | ||
bool cond, | ||
std::vector<void*>& pointers, | ||
bool record = false, | ||
bool validate = false) { | ||
if (cond) { | ||
input = input * 2; | ||
} | ||
void* input_ptr = input.data_ptr(); | ||
auto conv_out = at::conv2d(input, conv_weight); | ||
void* conv_out_ptr = input.data_ptr(); | ||
auto conv_out_flat = conv_out.view({conv_out.size(0), -1}); | ||
auto output = at::linear(conv_out_flat, linear_weight); | ||
if (record) { | ||
pointers.push_back(input_ptr); | ||
pointers.push_back(conv_out_ptr); | ||
} | ||
if (validate) { | ||
TORCH_CHECK(input_ptr == pointers[0]); | ||
TORCH_CHECK(conv_out_ptr == pointers[1]); | ||
} | ||
return output; | ||
} | ||
|
||
TEST(CPUAllocationPlanTest, with_control_flow) { | ||
at::Tensor a = at::rand({23, 16, 16, 16}); | ||
at::Tensor conv_weight = at::rand({16, 16, 3, 3}); | ||
// output shape | ||
// 23, 16, 14, 14 | ||
// Flattened shape = 23, 3136 | ||
at::Tensor linear_weight = at::rand({32, 3136}); | ||
at::Tensor output; | ||
std::vector<void*> pointers; | ||
|
||
auto valid_allocation_plan = [&]() { | ||
c10::AllocationPlan plan; | ||
{ | ||
c10::WithProfileAllocationsGuard profile_guard(&plan); | ||
output = run_with_control_flow( | ||
a, conv_weight, linear_weight, true, pointers); | ||
} | ||
}; | ||
ASSERT_NO_THROW(valid_allocation_plan()); | ||
|
||
auto validate_allocation_plan = | ||
[&](bool record_mode, bool validation_mode) -> bool { | ||
c10::AllocationPlan plan; | ||
{ | ||
c10::WithProfileAllocationsGuard profile_guard(&plan); | ||
output = | ||
run_with_control_flow(a, conv_weight, linear_weight, record_mode, pointers); | ||
} | ||
bool success{true}; | ||
for (uint64_t i = 0; i < 10; ++i) { | ||
bool validation_success; | ||
{ | ||
c10::WithValidateAllocationPlanGuard | ||
validation_guard(&plan, &validation_success); | ||
output = run_with_control_flow( | ||
a, conv_weight, linear_weight, validation_mode, pointers); | ||
} | ||
success = success && validation_success; | ||
} | ||
return success; | ||
}; | ||
ASSERT_FALSE(validate_allocation_plan(false, true)); | ||
ASSERT_FALSE(validate_allocation_plan(true, false)); | ||
ASSERT_TRUE(validate_allocation_plan(true, true)); | ||
ASSERT_TRUE(validate_allocation_plan(false, false)); | ||
} | ||
|
||
TEST(CPUAllocationPlanTest, with_profiling_alloc) { | ||
at::Tensor a = at::rand({23, 16, 16, 16}); | ||
at::Tensor conv_weight = at::rand({16, 16, 3, 3}); | ||
// output shape | ||
// 23, 16, 14, 14 | ||
// Flattened shape = 23, 3136 | ||
at::Tensor linear_weight = at::rand({32, 3136}); | ||
at::Tensor output; | ||
std::vector<void*> pointers; | ||
|
||
auto valid_allocation_plan = [&]() { | ||
c10::AllocationPlan plan; | ||
{ | ||
c10::WithProfileAllocationsGuard profile_guard(&plan); | ||
output = run_with_control_flow( | ||
a, conv_weight, linear_weight, false, pointers); | ||
} | ||
}; | ||
ASSERT_NO_THROW(valid_allocation_plan()); | ||
|
||
auto validate_allocation_plan = | ||
[&](bool record_mode, | ||
bool validation_mode, | ||
bool validate_pointers) { | ||
pointers.clear(); | ||
c10::AllocationPlan plan; | ||
{ | ||
c10::WithProfileAllocationsGuard profile_guard(&plan); | ||
output = run_with_control_flow( | ||
a, | ||
conv_weight, | ||
linear_weight, | ||
record_mode, | ||
pointers, | ||
false, | ||
false); | ||
} | ||
c10::CPUProfilingAllocator profiling_allocator; | ||
{ | ||
c10::WithProfilingAllocatorGuard | ||
profiling_allocator_guard(&profiling_allocator, &plan); | ||
output = run_with_control_flow( | ||
a, | ||
conv_weight, | ||
linear_weight, | ||
validation_mode, | ||
pointers, | ||
validate_pointers, | ||
false); | ||
} | ||
for (uint64_t i = 0; i < 10; ++i) { | ||
{ | ||
c10::WithProfilingAllocatorGuard | ||
profiling_allocator_guard(&profiling_allocator, &plan); | ||
output = run_with_control_flow( | ||
a, | ||
conv_weight, | ||
linear_weight, | ||
validation_mode, | ||
pointers, | ||
false, | ||
validate_pointers); | ||
} | ||
} | ||
}; | ||
// When control flow conditions are same between profiling and evaluation | ||
// profiling allocator should not throw. | ||
ASSERT_NO_THROW(validate_allocation_plan(true, true, false)); | ||
ASSERT_NO_THROW(validate_allocation_plan(false, false, false)); | ||
// Furthermore profiling allocator should return the same pointers | ||
// back for the intermediate tensors | ||
ASSERT_NO_THROW(validate_allocation_plan(true, true, true)); | ||
ASSERT_NO_THROW(validate_allocation_plan(false, false, true)); | ||
|
||
// When control flow conditions are different between profiling and evaluation | ||
// profiling allocator should throw. | ||
ASSERT_THROW(validate_allocation_plan(true, false, false), c10::Error); | ||
ASSERT_THROW(validate_allocation_plan(false, true, false), c10::Error); | ||
} | ||
|
||
int main(int argc, char* argv[]) { | ||
// At the moment caching allocator is only exposed to mobile cpu allocator. | ||
#ifdef C10_MOBILE | ||
::testing::InitGoogleTest(&argc, argv); | ||
at::manual_seed(42); | ||
return RUN_ALL_TESTS(); | ||
#endif /* C10_Mobile */ | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.