diff --git a/src/iris_common.h b/src/iris_common.h index a8d8622..466a869 100644 --- a/src/iris_common.h +++ b/src/iris_common.h @@ -54,6 +54,22 @@ SOFTWARE. #define IRIS_DEFAULT_PAGE_SIZE (IRIS_DEFAULT_BLOCK_SIZE * 64) #endif +#ifndef IRIS_PROFILE_THREAD +#define IRIS_PROFILE_THREAD(name, i) +#endif + +#ifndef IRIS_PROFILE_SCOPE +#define IRIS_PROFILE_SCOPE(name) +#endif + +#ifndef IRIS_PROFILE_PUSH +#define IRIS_PROFILE_PUSH(name) +#endif + +#ifndef IRIS_PROFILE_POP +#define IRIS_PROFILE_POP() +#endif + namespace iris { static constexpr size_t default_block_size = IRIS_DEFAULT_BLOCK_SIZE; static constexpr size_t default_page_size = IRIS_DEFAULT_PAGE_SIZE; diff --git a/src/iris_dispatcher.h b/src/iris_dispatcher.h index 2d01c59..a03e3a9 100644 --- a/src/iris_dispatcher.h +++ b/src/iris_dispatcher.h @@ -417,6 +417,8 @@ namespace iris { // cleanup the dispatcher, pass true to 'execute_remaining' to make sure all tasks are executed finally. template static bool join(iterator_t begin, iterator_t end) { + IRIS_PROFILE_SCOPE(__FUNCTION__); + if /* constexpr */ (!finalize) { // suspend all warps so we can take over tasks for (iterator_t p = begin; p != end; ++p) { @@ -484,6 +486,7 @@ namespace iris { template typename std::enable_if::type execute_internal() noexcept( noexcept(std::declval().flush()) && noexcept(std::declval()())) { + IRIS_PROFILE_SCOPE(__FUNCTION__); // mark for queueing, avoiding flush me more than once. queueing.store(queue_state_executing, std::memory_order_release); iris_warp_t** warp_ptr = &get_current_warp_internal(); @@ -510,6 +513,7 @@ namespace iris { template typename std::enable_if::type execute_internal() noexcept( noexcept(std::declval().flush()) && noexcept(std::declval()())) { + IRIS_PROFILE_SCOPE(__FUNCTION__); // mark for queueing, avoiding flush me more than once. queueing.store(queue_state_executing, std::memory_order_release); iris_warp_t** warp_ptr = &get_current_warp_internal(); @@ -538,6 +542,7 @@ namespace iris { template void execute() noexcept(noexcept(std::declval().template execute_internal())) { + IRIS_PROFILE_SCOPE(__FUNCTION__); if (suspend_count.load(std::memory_order_acquire) == 0) { // try to acquire execution, if it fails, there must be another thread doing the same thing // and it's ok to return immediately. @@ -740,6 +745,7 @@ namespace iris { } bool cleanup() noexcept { + IRIS_PROFILE_SCOPE(__FUNCTION__); routine_t* p = resurrect_routines.exchange(nullptr, std::memory_order_acquire); if (p != nullptr) { while (p != nullptr) { @@ -757,6 +763,7 @@ namespace iris { } bool resurrect() { + IRIS_PROFILE_SCOPE(__FUNCTION__); routine_t* p = resurrect_routines.exchange(nullptr, std::memory_order_acquire); if (p != nullptr) { while (p != nullptr) { @@ -805,6 +812,7 @@ namespace iris { }; void complete(bool success) { + IRIS_PROFILE_SCOPE(__FUNCTION__); // all pending routines finished? if (pending_count.fetch_sub(1, std::memory_order_release) == 1) { // if completion throws exception, we still do not care about pending_count anyway @@ -815,6 +823,7 @@ namespace iris { } void execute(routine_t* routine) { + IRIS_PROFILE_SCOPE(__FUNCTION__); assert(routine->lock_count.load(std::memory_order_relaxed) == 0); do { routine_guard_t guard(*this, routine, &resurrect_routines); @@ -926,6 +935,7 @@ namespace iris { for (size_t i = 0; i < internal_thread_count; i++) { threads[i] = thread_t([this, i]() { + IRIS_PROFILE_THREAD("iris_async_worker", i); thread_loop(i); }); } @@ -1129,6 +1139,7 @@ namespace iris { // wait for all threads in worker to be finished. void join() { + IRIS_PROFILE_SCOPE(__FUNCTION__); if (!task_heads.empty()) { for (size_t i = 0; i < threads.size(); i++) { if (threads[i].joinable()) { @@ -1186,6 +1197,7 @@ namespace iris { protected: // cleanup all pending tasks bool cleanup() { + IRIS_PROFILE_SCOPE(__FUNCTION__); bool empty = true; for (size_t i = 0; i < task_heads.size(); i++) { @@ -1222,6 +1234,7 @@ namespace iris { // poll with given priority bool poll_internal(size_t priority_size) { + IRIS_PROFILE_SCOPE(__FUNCTION__); std::pair slot = fetch(priority_size); size_t index = slot.first; diff --git a/src/iris_lua.h b/src/iris_lua.h index d89867a..e423663 100644 --- a/src/iris_lua.h +++ b/src/iris_lua.h @@ -84,6 +84,7 @@ namespace iris { // run a piece of code template return_t run(std::string_view code) { + IRIS_PROFILE_SCOPE(__FUNCTION__); auto guard = write_fence(); lua_State* L = state; stack_guard_t stack_guard(L); @@ -415,6 +416,7 @@ namespace iris { // call function in protect mode template return_t call(callable_t&& reference, args_t&&... args) { + IRIS_PROFILE_SCOPE(__FUNCTION__); auto guard = write_fence(); lua_State* L = state; diff --git a/src/iris_system.h b/src/iris_system.h index 292ad98..458b1d6 100644 --- a/src/iris_system.h +++ b/src/iris_system.h @@ -198,6 +198,7 @@ namespace iris { // iterate components template void for_each(operation_t&& op) noexcept(noexcept(std::declval>().for_each(op))) { + IRIS_PROFILE_SCOPE(__FUNCTION__); auto guard = read_fence(); std::get::value>(components).for_each(op); } @@ -205,6 +206,7 @@ namespace iris { // n is the expected group size template > void for_each_parallel(operand_t&& op, size_t n = queue_list_t::element_count) { + IRIS_PROFILE_SCOPE(__FUNCTION__); auto guard = read_fence(); auto& target_components = std::get::value>(components); @@ -481,11 +483,13 @@ namespace iris { template void for_each(operation_t&& op) { + IRIS_PROFILE_SCOPE(__FUNCTION__); for_each_impl(std::forward(op), std::integral_constant()); } template void for_each_system(operation_t&& op) { + IRIS_PROFILE_SCOPE(__FUNCTION__); for_each_system_impl(std::forward(op), gen_seq()); } @@ -634,11 +638,13 @@ namespace iris { template void for_each(operation_t&& op) { + IRIS_PROFILE_SCOPE(__FUNCTION__); for_each_impl(std::forward(op), std::integral_constant()); } template void for_each_system(operation_t&& op) { + IRIS_PROFILE_SCOPE(__FUNCTION__); for_each_system_impl(std::forward(op), gen_seq()); }