From 9acb94cdbc364205c96d66538f3ad06f3747aa54 Mon Sep 17 00:00:00 2001 From: Serge Petrenko Date: Mon, 18 Nov 2019 16:40:36 +0300 Subject: [PATCH] fiber.top() refactor clock and cpu time calculation Unify all the members related to fiber's clock statistics into struct clock_stat and all the members related to cord's knowledge of cpu state and clocks to struct cpu_stat. Reset stats of all alive fibers on fiber.top_enable(). Follow-up #2694 --- src/lib/core/fiber.c | 179 ++++++++++++++++++++++++++----------------- src/lib/core/fiber.h | 109 ++++++++++++++++++-------- src/lua/fiber.c | 20 +++-- 3 files changed, 199 insertions(+), 109 deletions(-) diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c index aebaba7f04f7..1e08d0ec92b1 100644 --- a/src/lib/core/fiber.c +++ b/src/lib/core/fiber.c @@ -88,6 +88,9 @@ static int (*fiber_invoke)(fiber_func f, va_list ap); #if ENABLE_FIBER_TOP static __thread bool fiber_top_enabled = false; + +uint64_t +cpu_stat_on_csw(struct cpu_stat *stat); #endif /* ENABLE_FIBER_TOP */ /** @@ -103,18 +106,10 @@ clock_set_on_csw(struct fiber *caller) if (!fiber_top_enabled) return; - uint64_t clock; - uint32_t cpu_id; - clock = __rdtscp(&cpu_id); + uint64_t delta = cpu_stat_on_csw(&cord()->cpu_stat); - if (cpu_id == cord()->cpu_id_last) { - caller->clock_delta += clock - cord()->clock_last; - cord()->clock_delta += clock - cord()->clock_last; - } else { - cord()->cpu_id_last = cpu_id; - cord()->cpu_miss_count++; - } - cord()->clock_last = clock; + clock_stat_add_delta(&cord()->clock_stat, delta); + clock_stat_add_delta(&caller->clock_stat, delta); #endif /* ENABLE_FIBER_TOP */ } @@ -695,9 +690,7 @@ fiber_reset(struct fiber *fiber) rlist_create(&fiber->on_stop); fiber->flags = FIBER_DEFAULT_FLAGS; #if ENABLE_FIBER_TOP - fiber->cputime = 0; - fiber->clock_acc = 0; - fiber->clock_delta = 0; + clock_stat_reset(&fiber->clock_stat); #endif /* ENABLE_FIBER_TOP */ } @@ -1095,9 +1088,52 @@ loop_on_iteration_start(ev_loop *loop, ev_check *watcher, int revents) (void) watcher; (void) revents; - cord()->clock_last = __rdtscp(&cord()->cpu_id_last); - cord()->cpu_miss_count = 0; + cpu_stat_start(&cord()->cpu_stat); +} + +/** + * Calculate the exponential moving average for the clock deltas + * per loop iteration. The coeffitient is 1/16. + */ +static inline uint64_t +clock_diff_accumulate(uint64_t acc, uint64_t delta) +{ + if (acc > 0) { + return delta / 16 + 15 * acc / 16; + } else { + return delta; + } +} +inline void +clock_stat_add_delta(struct clock_stat *stat, uint64_t clock_delta) +{ + stat->delta += clock_delta; +} + +void +clock_stat_update(struct clock_stat *stat, double nsec_per_clock) +{ + stat->acc = clock_diff_accumulate(stat->acc, stat->delta); + stat->prev_delta = stat->delta; + stat->cputime += stat->delta * nsec_per_clock; + stat->delta = 0; +} + +void +clock_stat_reset(struct clock_stat *stat) +{ + stat->acc = 0; + stat->delta = 0; + stat->prev_delta = 0; + stat->cputime = 0; +} + +void +cpu_stat_start(struct cpu_stat *stat) +{ + stat->prev_clock = __rdtscp(&stat->prev_cpu_id); + stat->cpu_miss_count = 0; /* * We want to measure thread cpu time here to calculate * each fiber's cpu time, so don't use libev's ev_now() or @@ -1106,27 +1142,59 @@ loop_on_iteration_start(ev_loop *loop, ev_check *watcher, int revents) */ struct timespec ts; if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) { - say_debug("clock_gettime(): failed to get this" - "thread's cpu time."); + say_debug("clock_gettime(): failed to get this thread's" + " cpu time."); return; } - cord()->cputime_last = (uint64_t) ts.tv_sec * FIBER_TIME_RES + - ts.tv_nsec; + stat->prev_cputime = (uint64_t) ts.tv_sec * FIBER_TIME_RES + ts.tv_nsec; } +void +cpu_stat_reset(struct cpu_stat *stat) +{ + stat->prev_cpu_miss_count = 0; + cpu_stat_start(stat); +} -/** - * Calculate the exponential moving average for the clock deltas - * per loop iteration. The coeffitient is 1/16. - */ -static inline uint64_t -clock_diff_accumulate(uint64_t acc, uint64_t delta) +uint64_t +cpu_stat_on_csw(struct cpu_stat *stat) { - if (acc > 0) { - return delta / 16 + 15 * acc / 16; + uint32_t cpu_id; + uint64_t delta = 0; + uint64_t clock = __rdtscp(&cpu_id); + + if (cpu_id == stat->prev_cpu_id) { + delta = clock - stat->prev_clock; } else { - return delta; + stat->prev_cpu_id = cpu_id; + stat->cpu_miss_count++; + } + stat->prev_clock = clock; + + return delta; +} + +double +cpu_stat_end(struct cpu_stat *stat, struct clock_stat *cord_clock_stat) +{ + stat->prev_cpu_miss_count = stat->cpu_miss_count; + stat->cpu_miss_count = 0; + + struct timespec ts; + uint64_t delta_time; + double nsec_per_clock = 0; + if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) { + say_debug("clock_gettime(): failed to get this thread's" + " cpu time."); + } else { + delta_time = (uint64_t) ts.tv_sec * FIBER_TIME_RES + + ts.tv_nsec; + if (delta_time > stat->prev_cputime && cord_clock_stat->delta > 0) { + delta_time -= stat->prev_cputime; + nsec_per_clock = (double) delta_time / cord()->clock_stat.delta; + } } + return nsec_per_clock; } static void @@ -1145,40 +1213,14 @@ loop_on_iteration_end(ev_loop *loop, ev_prepare *watcher, int revents) */ clock_set_on_csw(&cord()->sched); - cord()->cpu_miss_count_last = cord()->cpu_miss_count; - cord()->cpu_miss_count = 0; + double nsec_per_clock = cpu_stat_end(&cord()->cpu_stat, + &cord()->clock_stat); - struct timespec ts; - uint64_t delta_time; - double nsec_per_clock = 0; - - if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) { - say_debug("clock_gettime(): failed to get this" - "thread's cpu time."); - } else { - delta_time = (uint64_t) ts.tv_sec * FIBER_TIME_RES + - ts.tv_nsec; - assert(delta_time > cord()->cputime_last); - delta_time -= cord()->cputime_last; - - if (cord()->clock_delta > 0) - nsec_per_clock = (double) delta_time / cord()->clock_delta; - } - - cord()->clock_acc = clock_diff_accumulate(cord()->clock_acc, cord()->clock_delta); - cord()->clock_delta_last = cord()->clock_delta; - cord()->clock_delta = 0; - - cord()->sched.clock_acc = clock_diff_accumulate(cord()->sched.clock_acc, cord()->sched.clock_delta); - cord()->sched.clock_delta_last = cord()->sched.clock_delta; - cord()->sched.cputime += cord()->sched.clock_delta * nsec_per_clock; - cord()->sched.clock_delta = 0; + clock_stat_update(&cord()->clock_stat, nsec_per_clock); + clock_stat_update(&cord()->sched.clock_stat, nsec_per_clock); rlist_foreach_entry(fiber, &cord()->alive, link) { - fiber->clock_acc = clock_diff_accumulate(fiber->clock_acc, fiber->clock_delta); - fiber->clock_delta_last = fiber->clock_delta; - fiber->cputime += fiber->clock_delta * nsec_per_clock; - fiber->clock_delta = 0; + clock_stat_update(&fiber->clock_stat, nsec_per_clock); } } @@ -1203,17 +1245,14 @@ fiber_top_enable() ev_check_start(cord()->loop, &cord()->check_event); fiber_top_enabled = true; - cord()->clock_acc = 0; - cord()->cpu_miss_count_last = 0; - cord()->clock_delta_last = 0; - struct timespec ts; - if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) { - say_debug("clock_gettime(): failed to get this" - "thread's cpu time."); - return; + cpu_stat_reset(&cord()->cpu_stat); + clock_stat_reset(&cord()->clock_stat); + clock_stat_reset(&cord()->sched.clock_stat); + + struct fiber *fiber; + rlist_foreach_entry(fiber, &cord()->alive, link) { + clock_stat_reset(&fiber->clock_stat); } - cord()->cputime_last = (uint64_t) ts.tv_sec * FIBER_TIME_RES + - ts.tv_nsec; } } diff --git a/src/lib/core/fiber.h b/src/lib/core/fiber.h index ab313a0504da..06ce28bb151c 100644 --- a/src/lib/core/fiber.h +++ b/src/lib/core/fiber.h @@ -56,14 +56,82 @@ #define ENABLE_FIBER_TOP 1 #endif +#if defined(__cplusplus) +extern "C" { +#endif /* defined(__cplusplus) */ + #if ENABLE_FIBER_TOP /* A fiber reports used up CPU time with nanosecond resolution. */ #define FIBER_TIME_RES 1000000000 -#endif /* ENABLE_FIBER_TOP */ -#if defined(__cplusplus) -extern "C" { -#endif /* defined(__cplusplus) */ +/** + * A struct containing all the info gathered for current fiber or + * thread as a whole when fiber.top() is enabled. + */ +struct clock_stat { + /** + * Accumulated clock value calculated using exponential + * moving average. + */ + uint64_t acc; + /** + * Clock delta counter used on current event loop + * iteration. + */ + uint64_t delta; + /** + * Clock delta calculated on previous event loop + * iteration. + */ + uint64_t prev_delta; + /** + * Total processor time this fiber (or cord as a whole) + * has spent with 1 / FIBER_TIME_RES second precision. + */ + uint64_t cputime; +}; + +void +clock_stat_add_delta(struct clock_stat *stat, uint64_t clock_delta); + +void +clock_stat_update(struct clock_stat *stat, double nsec_per_clock); + +void +clock_stat_reset(struct clock_stat *stat); + +/** + * A struct encapsulating all knowledge this cord has about cpu + * clocks and their state. + */ +struct cpu_stat { + uint64_t prev_clock; + /** + * This thread's CPU time at the beginning of event loop + * iteration. Used to calculate how much cpu time has + * each loop iteration consumed and update fiber cpu + * times propotionally. The resolution is + * 1 / FIBER_TIME_RES seconds. + */ + uint64_t prev_cputime; + uint32_t prev_cpu_id; + uint32_t cpu_miss_count; + uint32_t prev_cpu_miss_count; +}; + +void +cpu_stat_start(struct cpu_stat *stat); + +void +cpu_stat_reset(struct cpu_stat *stat); + +uint64_t +cpu_stat_on_csw(struct cpu_stat *stat); + +double +cpu_stat_end(struct cpu_stat *stat, struct clock_stat *cord_clock_stat); + +#endif /* ENABLE_FIBER_TOP */ enum { FIBER_NAME_MAX = 32 }; @@ -402,21 +470,7 @@ struct fiber { /** Fiber flags */ uint32_t flags; #if ENABLE_FIBER_TOP - /** - * Accumulated clock value calculated using exponential - * moving average. - */ - uint64_t clock_acc; - /** - * Total processor time this fiber has spent with - * 1 / FIBER_TIME_RES second precision. - */ - uint64_t cputime; - /** - * Clock delta calculated on previous event loop iteration. - */ - uint64_t clock_delta_last; - uint64_t clock_delta; + struct clock_stat clock_stat; #endif /* ENABLE_FIBER_TOP */ /** Link in cord->alive or cord->dead list. */ struct rlist link; @@ -490,21 +544,8 @@ struct cord { */ uint32_t max_fid; #if ENABLE_FIBER_TOP - uint64_t clock_acc; - uint64_t clock_delta; - uint64_t clock_delta_last; - uint64_t clock_last; - uint32_t cpu_id_last; - uint32_t cpu_miss_count; - uint32_t cpu_miss_count_last; - /** - * This thread's CPU time at the beginning of event loop - * iteration. Used to calculate how much cpu time has - * each loop iteration consumed and update fiber cpu - * times propotionally. The resolution is - * 1 / FIBER_TIME_RES seconds. - */ - uint64_t cputime_last; + struct clock_stat clock_stat; + struct cpu_stat cpu_stat; #endif /* ENABLE_FIBER_TOP */ pthread_t id; const struct cord_on_exit *on_exit; diff --git a/src/lua/fiber.c b/src/lua/fiber.c index 8b3b22e55633..53ebec9aa5f6 100644 --- a/src/lua/fiber.c +++ b/src/lua/fiber.c @@ -278,7 +278,7 @@ lbox_fiber_statof(struct fiber *f, void *cb_ctx, bool backtrace) lua_settable(L, -3); lua_pushliteral(L, "time"); - lua_pushnumber(L, f->cputime / (double) FIBER_TIME_RES); + lua_pushnumber(L, f->clock_stat.cputime / (double) FIBER_TIME_RES); lua_settable(L, -3); lua_pushliteral(L, "memory"); @@ -335,13 +335,23 @@ lbox_fiber_top_entry(struct fiber *f, void *cb_ctx) lua_newtable(L); lua_pushliteral(L, "average"); - lua_pushnumber(L, f->clock_acc / (double)cord()->clock_acc * 100); + if (cord()->clock_stat.acc != 0) { + lua_pushnumber(L, f->clock_stat.acc / + (double)cord()->clock_stat.acc * 100); + } else { + lua_pushnumber(L, 0); + } lua_settable(L, -3); lua_pushliteral(L, "instant"); - lua_pushnumber(L, f->clock_delta_last / (double)cord()->clock_delta_last * 100); + if (cord()->clock_stat.prev_delta != 0) { + lua_pushnumber(L, f->clock_stat.prev_delta / + (double)cord()->clock_stat.prev_delta * 100); + } else { + lua_pushnumber(L, 0); + } lua_settable(L, -3); lua_pushliteral(L, "time"); - lua_pushnumber(L, f->cputime / (double) FIBER_TIME_RES); + lua_pushnumber(L, f->clock_stat.cputime / (double) FIBER_TIME_RES); lua_settable(L, -3); lua_settable(L, -3); @@ -357,7 +367,7 @@ lbox_fiber_top(struct lua_State *L) } lua_newtable(L); lua_pushliteral(L, "cpu_misses"); - lua_pushnumber(L, cord()->cpu_miss_count_last); + lua_pushnumber(L, cord()->cpu_stat.prev_cpu_miss_count); lua_settable(L, -3); lua_pushliteral(L, "cpu");