From 165f334478ffa1923304f909511fc4fdba69175e Mon Sep 17 00:00:00 2001 From: Christian Decker Date: Thu, 27 Jul 2023 14:35:18 +0930 Subject: [PATCH 1/7] config: Detect presence of USDT header in configure --- configure | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/configure b/configure index 8a5b00c1f43f..807d4ab466d3 100755 --- a/configure +++ b/configure @@ -413,6 +413,18 @@ int main(void) return 0; } /*END*/ +var=HAVE_USDT +desc=User Statically-Defined Tracing (USDT) +style=DEFINES_EVERYTHING|EXECUTE|MAY_NOT_COMPILE +code= +#include + +int main(void) +{ + DTRACE_PROBE(lightningd, test_sdt); + return 0; +} +/*END*/ var=HAVE_GCC desc=compiler is GCC style=OUTSIDE_MAIN From 4e8b940f12aa30b083a0de7fa3044f79e4318c29 Mon Sep 17 00:00:00 2001 From: Christian Decker Date: Thu, 27 Jul 2023 14:43:53 +0930 Subject: [PATCH 2/7] trace: First draft of API --- common/Makefile | 1 + common/trace.c | 200 ++++++++++++++++++++++++++++++++++ common/trace.h | 11 ++ lightningd/Makefile | 1 + lightningd/jsonrpc.c | 4 + lightningd/test/run-jsonrpc.c | 9 ++ 6 files changed, 226 insertions(+) create mode 100644 common/trace.c create mode 100644 common/trace.h diff --git a/common/Makefile b/common/Makefile index 1944a5f15936..fbefb5eb0831 100644 --- a/common/Makefile +++ b/common/Makefile @@ -88,6 +88,7 @@ COMMON_SRC_NOGEN := \ common/status_wire.c \ common/subdaemon.c \ common/timeout.c \ + common/trace.c \ common/tx_roles.c \ common/type_to_string.c \ common/utils.c \ diff --git a/common/trace.c b/common/trace.c new file mode 100644 index 000000000000..ad3fa0c9953c --- /dev/null +++ b/common/trace.c @@ -0,0 +1,200 @@ +#include "config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if HAVE_USDT +#define MAX_ACTIVE_SPANS 128 +#define SPAN_ID_LEN 33 + +const char *trace_service_name = "lightningd"; + +struct span_tag { + char *name, *value; +}; + +struct span { + size_t key; + u64 id; + struct span *parent; + u64 start_time; + u64 end_time; + struct span_tag *tags; + char *name; +}; + +/* All traces we emit are prefixed with this constant. */ +static u64 trace_prefix = 0; +static struct span *active_spans; +static struct span *current; +static size_t last_span_id; + +static void trace_span_id_serialize(struct span *s, char *dest) +{ + hex_encode(&trace_prefix, 8, dest, 17); + hex_encode(&s->id, 8, 16 + dest, 17); +} + +static void trace_init(void) { + randombytes_buf(&trace_prefix, sizeof(u64)); + active_spans = malloc(sizeof(struct span) * MAX_ACTIVE_SPANS); + last_span_id = 1; + current = NULL; +} + +/** + * Convert the pointer to a context object to a numeric key. + */ +static size_t trace_key(const void *key) +{ + return (size_t)key; +} + +static struct span *trace_span_find(size_t key) +{ + for (size_t i = 0; i < MAX_ACTIVE_SPANS; i++) + if (active_spans[i].key == key) + return &active_spans[i]; + + return NULL; +} + +/** + * Find an empty slot for a new span. + */ +static struct span *trace_span_slot(void) +{ + struct span *s = trace_span_find(0); + + /* Might end up here if we have more than MAX_ACTIVE_SPANS + * concurrent spans. */ + assert(s); + assert(s->parent == NULL); + assert(s != current); + return s; +} + +static void trace_emit(struct span *s) +{ + char id[SPAN_ID_LEN]; + trace_span_id_serialize(s, id); + + char *res = tal_fmt( + NULL, + "[{\"id\": \"%s\", \"name\": \"%s\", " + "\"timestamp\": %" PRIu64 ", \"duration\": %" PRIu64 ",", + id + 16, s->name, s->start_time, s->end_time - s->start_time); + + tal_append_fmt(&res, "\"localEndpoint\": { \"serviceName\": \"%s\"}, ", + trace_service_name); + + if (s->parent != NULL) { + trace_span_id_serialize(s->parent, id); + tal_append_fmt(&res, "\"parentId\": \"%s\",", id + 16); + } + + tal_append_fmt(&res, "\"tags\": {"); + for (size_t i = 0; i < tal_count(s->tags); i++) { + tal_append_fmt(&res, "%s\"%s\": \"%s\"", i == 0 ? "" : ", ", + s->tags[i].name, s->tags[i].value); + } + + trace_span_id_serialize(s, id); + tal_append_fmt(&res, "}, \"traceId\": \"%.*s\"}]", 16, id); + fprintf(stderr, "%s\n", res); + tal_free(res); +} + +/** + * Release the span back into the pool of available spans. + */ +static void trace_span_clear(struct span *s) +{ + s->key = 0; + s->id = 0; + s->parent = NULL; + s->name = tal_free(s->name); + s->tags = tal_free(s->tags); +} + +void trace_span_start(const char *name, const void *key) +{ + if (!trace_prefix) + trace_init(); + + size_t numkey = trace_key(key); + struct timeabs now = time_now(); + + assert(trace_span_find(numkey) == NULL); + struct span *s = trace_span_slot(); + assert(current == NULL || current->id != 0); + // assert(current != s); + s->key = numkey; + s->id = last_span_id++; + s->start_time = (now.ts.tv_sec * 1000000) + now.ts.tv_nsec / 1000; + s->parent = current; + s->tags = notleak(tal_arr(NULL, struct span_tag, 0)); + s->name = notleak(tal_strdup(NULL, name)); + current = s; +} + +void trace_span_end(const void *key) +{ + size_t numkey = trace_key(key); + struct span *s = trace_span_find(numkey); + assert(s && "Span to end not found"); + assert(s == current && "Ending a span that isn't the current one"); + + struct timeabs now = time_now(); + s->end_time = (now.ts.tv_sec * 1000000) + now.ts.tv_nsec / 1000; + trace_emit(s); + + /* Reset the context span we are in. */ + current = s->parent; + + /* Now reset the span */ + trace_span_clear(s); +} + +void trace_span_tag(const void *key, const char *name, const char *value) +{ + size_t numkey = trace_key(key); + struct span *span = trace_span_find(numkey); + assert(span); + + size_t s = tal_count(span->tags); + tal_resize(&span->tags, s + 1); + span->tags[s].name = tal_strdup(span->tags, name); + span->tags[s].value = tal_strdup(span->tags, value); +} + +void trace_span_suspend(const void *key) +{ + current = NULL; + fprintf(stderr, "spanSuspend-%s (%p)\n", "???", key); +} + +void trace_span_resume(const void *key) +{ + size_t numkey = trace_key(key); + current = trace_span_find(numkey); + fprintf(stderr, "spanResume-%s (%p)\n", "???", key); +} +#else /* HAVE_USDT */ + +void trace_span_start(const char *name, const void *key) {} +void trace_span_end(const void *key) {} +void trace_span_suspend(const void *key) {} +void trace_span_resume(const void *key) {} +void trace_span_tag(const void *key, const char *name, const char *value) {} + +#endif /* HAVE_USDT */ diff --git a/common/trace.h b/common/trace.h new file mode 100644 index 000000000000..7143659c9a8c --- /dev/null +++ b/common/trace.h @@ -0,0 +1,11 @@ +#ifndef LIGHTNING_COMMON_TRACE_H +#define LIGHTNING_COMMON_TRACE_H +#include "config.h" + +void trace_span_start(const char *name, const void *key); +void trace_span_end(const void *key); +void trace_span_suspend(const void *key); +void trace_span_resume(const void *key); +void trace_span_tag(const void *key, const char *name, const char *value); + +#endif /* LIGHTNING_COMMON_TRACE_H */ diff --git a/lightningd/Makefile b/lightningd/Makefile index a12345ce9f38..b867e56ce278 100644 --- a/lightningd/Makefile +++ b/lightningd/Makefile @@ -133,6 +133,7 @@ LIGHTNINGD_COMMON_OBJS := \ common/sphinx.o \ common/status_wire.o \ common/timeout.o \ + common/trace.o \ common/tx_roles.o \ common/type_to_string.o \ common/utils.o \ diff --git a/lightningd/jsonrpc.c b/lightningd/jsonrpc.c index a0bddd7c6ae5..d3aa23b1242e 100644 --- a/lightningd/jsonrpc.c +++ b/lightningd/jsonrpc.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -978,7 +979,10 @@ parse_request(struct json_connection *jcon, const jsmntok_t tok[]) rpc_hook->custom_replace = NULL; rpc_hook->custom_buffer = NULL; + trace_span_start("lightningd/jsonrpc", &c); + trace_span_tag(&c, "method", c->json_cmd->name); completed = plugin_hook_call_rpc_command(jcon->ld, c->id, rpc_hook); + trace_span_end(&c); /* If it's deferred, mark it (otherwise, it's completed) */ if (!completed) diff --git a/lightningd/test/run-jsonrpc.c b/lightningd/test/run-jsonrpc.c index 182be1d0e285..3d86fe1a1b4c 100644 --- a/lightningd/test/run-jsonrpc.c +++ b/lightningd/test/run-jsonrpc.c @@ -137,6 +137,15 @@ void towire_channel_id(u8 **pptr UNNEEDED, const struct channel_id *channel_id U /* Generated stub for towire_node_id */ void towire_node_id(u8 **pptr UNNEEDED, const struct node_id *id UNNEEDED) { fprintf(stderr, "towire_node_id called!\n"); abort(); } +/* Generated stub for trace_span_end */ +void trace_span_end(const void *key UNNEEDED) +{ fprintf(stderr, "trace_span_end called!\n"); abort(); } +/* Generated stub for trace_span_start */ +void trace_span_start(const char *name UNNEEDED, const void *key UNNEEDED) +{ fprintf(stderr, "trace_span_start called!\n"); abort(); } +/* Generated stub for trace_span_tag */ +void trace_span_tag(const void *key UNNEEDED, const char *name UNNEEDED, const char *value UNNEEDED) +{ fprintf(stderr, "trace_span_tag called!\n"); abort(); } /* Generated stub for unilateral_feerate */ u32 unilateral_feerate(struct chain_topology *topo UNNEEDED, bool option_anchors UNNEEDED) { fprintf(stderr, "unilateral_feerate called!\n"); abort(); } From 72865d29b848d42a04ae08c7b72a574a91939008 Mon Sep 17 00:00:00 2001 From: Christian Decker Date: Thu, 27 Jul 2023 14:43:54 +0930 Subject: [PATCH 3/7] trace: Instrument lightningd startup sequence Should tell us where we're spending time. --- lightningd/lightningd.c | 19 +++++++++++++++++++ lightningd/test/run-find_my_abspath.c | 6 ++++++ 2 files changed, 25 insertions(+) diff --git a/lightningd/lightningd.c b/lightningd/lightningd.c index 7f2527cb211f..759766d61c47 100644 --- a/lightningd/lightningd.c +++ b/lightningd/lightningd.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -924,6 +925,8 @@ int main(int argc, char *argv[]) char **orig_argv; bool try_reexec; + trace_span_start("lightningd/startup", argv); + /*~ We fork out new processes very very often; every channel gets its * own process, for example, and we have `hsmd` and `gossipd` and * the plugins as well. @@ -1023,7 +1026,9 @@ int main(int argc, char *argv[]) /*~ Initialize all the plugins we just registered, so they can * do their thing and tell us about themselves (including * options registration). */ + trace_span_start("plugins/init", ld->plugins); plugins_init(ld->plugins); + trace_span_end(ld->plugins); /*~ If the plugis are misconfigured we don't want to proceed. A * misconfiguration could for example be a plugin marked as important @@ -1047,8 +1052,10 @@ int main(int argc, char *argv[]) * but the `dev_no_version_checks` field of `ld` doesn't even exist * if DEVELOPER isn't defined, so we use IFDEV(devoption,non-devoption): */ + trace_span_start("test_subdaemons", ld); if (IFDEV(!ld->dev_no_version_checks, 1)) test_subdaemons(ld); + trace_span_end(ld); /*~ Set up the HSM daemon, which knows our node secret key, so tells * us who we are. @@ -1057,7 +1064,9 @@ int main(int argc, char *argv[]) * standard of key storage; ours is in software for now, so the name * doesn't really make sense, but we can't call it the Badly-named * Daemon Software Module. */ + trace_span_start("hsmd_init", ld); ld->bip32_base = hsm_init(ld); + trace_span_end(ld); /*~ We have bearer tokens called `runes` you can use to control access. They have * a fascinating history which I shall not go into now, but they're derived from @@ -1069,7 +1078,9 @@ int main(int argc, char *argv[]) /*~ Our "wallet" code really wraps the db, which is more than a simple * bitcoin wallet (though it's that too). It also stores channel * states, invoices, payments, blocks and bitcoin transactions. */ + trace_span_start("wallet_new", ld); ld->wallet = wallet_new(ld, ld->timers); + trace_span_end(ld); /*~ We keep a filter of scriptpubkeys we're interested in. */ ld->owned_txfilter = txfilter_new(ld); @@ -1091,7 +1102,9 @@ int main(int argc, char *argv[]) * which knows (via node_announcement messages) the public * addresses of nodes, so connectd_init hands it one end of a * socket pair, and gives us the other */ + trace_span_start("connectd_init", ld); connectd_gossipd_fd = connectd_init(ld); + trace_span_end(ld); /*~ We do every database operation within a transaction; usually this * is covered by the infrastructure (eg. opening a transaction before @@ -1109,7 +1122,9 @@ int main(int argc, char *argv[]) errx(EXITCODE_WALLET_DB_MISMATCH, "Wallet sanity check failed."); /*~ Initialize the transaction filter with our pubkeys. */ + trace_span_start("init_txfilter", ld->wallet); init_txfilter(ld->wallet, ld->bip32_base, ld->owned_txfilter); + trace_span_end(ld->wallet); /*~ Get the blockheight we are currently at, UINT32_MAX is used to signal * an uninitialized wallet and that we should start off of bitcoind's @@ -1135,7 +1150,9 @@ int main(int argc, char *argv[]) /*~ Initialize block topology. This does its own io_loop to * talk to bitcoind, so does its own db transactions. */ + trace_span_start("setup_topology", ld->topology); setup_topology(ld->topology, min_blockheight, max_blockheight); + trace_span_end(ld->topology); db_begin_transaction(ld->wallet->db); @@ -1186,6 +1203,8 @@ int main(int argc, char *argv[]) /*~ Now handle sigchld, so we can clean up appropriately. */ sigchld_conn = notleak(io_new_conn(ld, sigchld_rfd, sigchld_rfd_in, ld)); + trace_span_end(argv); + /*~ Mark ourselves live. * * Note the use of type_to_string() here: it's a typesafe formatter, diff --git a/lightningd/test/run-find_my_abspath.c b/lightningd/test/run-find_my_abspath.c index 035ae3de21ad..6f01a9e83faf 100644 --- a/lightningd/test/run-find_my_abspath.c +++ b/lightningd/test/run-find_my_abspath.c @@ -221,6 +221,12 @@ void towire_channel_id(u8 **pptr UNNEEDED, const struct channel_id *channel_id U /* Generated stub for towire_node_id */ void towire_node_id(u8 **pptr UNNEEDED, const struct node_id *id UNNEEDED) { fprintf(stderr, "towire_node_id called!\n"); abort(); } +/* Generated stub for trace_span_end */ +void trace_span_end(const void *key UNNEEDED) +{ fprintf(stderr, "trace_span_end called!\n"); abort(); } +/* Generated stub for trace_span_start */ +void trace_span_start(const char *name UNNEEDED, const void *key UNNEEDED) +{ fprintf(stderr, "trace_span_start called!\n"); abort(); } /* Generated stub for txfilter_add_derkey */ void txfilter_add_derkey(struct txfilter *filter UNNEEDED, const u8 derkey[PUBKEY_CMPR_LEN]) From 99656235385f1e9aa131aa71aa72a4545997bcca Mon Sep 17 00:00:00 2001 From: Christian Decker Date: Thu, 27 Jul 2023 14:43:56 +0930 Subject: [PATCH 4/7] trace: Instrument the wallet startup functions --- wallet/test/Makefile | 2 +- wallet/test/run-db.c | 6 ++++++ wallet/test/run-wallet.c | 6 ++++++ wallet/wallet.c | 10 ++++++++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/wallet/test/Makefile b/wallet/test/Makefile index c1df830636d8..2f5952967430 100644 --- a/wallet/test/Makefile +++ b/wallet/test/Makefile @@ -21,7 +21,7 @@ WALLET_TEST_COMMON_OBJS := \ common/node_id.o \ common/onionreply.o \ common/key_derive.o \ - common/psbt_keypath.o \ + common/psbt_keypath.o \ common/pseudorand.o \ common/setup.o \ common/timeout.o \ diff --git a/wallet/test/run-db.c b/wallet/test/run-db.c index e368a7aaaafc..ad8d84eac688 100644 --- a/wallet/test/run-db.c +++ b/wallet/test/run-db.c @@ -244,6 +244,12 @@ u8 *towire_hsmd_get_output_scriptpubkey(const tal_t *ctx UNNEEDED, u64 channel_i /* Generated stub for towire_temporary_node_failure */ u8 *towire_temporary_node_failure(const tal_t *ctx UNNEEDED) { fprintf(stderr, "towire_temporary_node_failure called!\n"); abort(); } +/* Generated stub for trace_span_end */ +void trace_span_end(const void *key UNNEEDED) +{ fprintf(stderr, "trace_span_end called!\n"); abort(); } +/* Generated stub for trace_span_start */ +void trace_span_start(const char *name UNNEEDED, const void *key UNNEEDED) +{ fprintf(stderr, "trace_span_start called!\n"); abort(); } /* Generated stub for txfilter_add_scriptpubkey */ void txfilter_add_scriptpubkey(struct txfilter *filter UNNEEDED, const u8 *script TAKES UNNEEDED) { fprintf(stderr, "txfilter_add_scriptpubkey called!\n"); abort(); } diff --git a/wallet/test/run-wallet.c b/wallet/test/run-wallet.c index c06e7222c700..68693ecf936b 100644 --- a/wallet/test/run-wallet.c +++ b/wallet/test/run-wallet.c @@ -856,6 +856,12 @@ u8 *towire_warningfmt(const tal_t *ctx UNNEEDED, const struct channel_id *channel UNNEEDED, const char *fmt UNNEEDED, ...) { fprintf(stderr, "towire_warningfmt called!\n"); abort(); } +/* Generated stub for trace_span_end */ +void trace_span_end(const void *key UNNEEDED) +{ fprintf(stderr, "trace_span_end called!\n"); abort(); } +/* Generated stub for trace_span_start */ +void trace_span_start(const char *name UNNEEDED, const void *key UNNEEDED) +{ fprintf(stderr, "trace_span_start called!\n"); abort(); } /* Generated stub for try_reconnect */ void try_reconnect(const tal_t *ctx UNNEEDED, struct peer *peer UNNEEDED, diff --git a/wallet/wallet.c b/wallet/wallet.c index 5bc92fe10cc5..3ef28d9ec1c9 100644 --- a/wallet/wallet.c +++ b/wallet/wallet.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -110,12 +111,21 @@ struct wallet *wallet_new(struct lightningd *ld, struct timers *timers) wallet->log = new_logger(wallet, ld->log_book, NULL, "wallet"); wallet->keyscan_gap = 50; list_head_init(&wallet->unstored_payments); + trace_span_start("db_setup", wallet); wallet->db = db_setup(wallet, ld, ld->bip32_base); + trace_span_end(wallet); db_begin_transaction(wallet->db); + + trace_span_start("invoices_new", wallet); wallet->invoices = invoices_new(wallet, wallet, timers); + trace_span_end(wallet); + + trace_span_start("outpointfilters_init", wallet); outpointfilters_init(wallet); load_indexes(wallet->db, ld->indexes); + trace_span_end(wallet); + db_commit_transaction(wallet->db); return wallet; } From 6df5d5253a901c91ce9f60340efa287fb2f35e79 Mon Sep 17 00:00:00 2001 From: Christian Decker Date: Thu, 27 Jul 2023 14:43:58 +0930 Subject: [PATCH 5/7] trace: Instrument topology functions --- lightningd/bitcoind.c | 6 ++++++ lightningd/chaintopology.c | 13 +++++++++++++ 2 files changed, 19 insertions(+) diff --git a/lightningd/bitcoind.c b/lightningd/bitcoind.c index e2ad1817cb35..910533362565 100644 --- a/lightningd/bitcoind.c +++ b/lightningd/bitcoind.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -464,6 +465,8 @@ getrawblockbyheight_callback(const char *buf, const jsmntok_t *toks, const char *block_str, *err; struct bitcoin_blkid blkid; struct bitcoin_block *blk; + trace_span_resume(call); + trace_span_end(call); /* If block hash is `null`, this means not found! Call the callback * with NULL values. */ @@ -514,6 +517,9 @@ void bitcoind_getrawblockbyheight_(struct bitcoind *bitcoind, call->cb = cb; call->cb_arg = cb_arg; + trace_span_start("plugin/bitcoind", call); + trace_span_tag(call, "method", "getrawblockbyheight"); + trace_span_suspend(call); req = jsonrpc_request_start(bitcoind, "getrawblockbyheight", NULL, true, bitcoind->log, NULL, getrawblockbyheight_callback, diff --git a/lightningd/chaintopology.c b/lightningd/chaintopology.c index 3c72da362dc8..49be23683d42 100644 --- a/lightningd/chaintopology.c +++ b/lightningd/chaintopology.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -971,13 +972,22 @@ static void add_tip(struct chain_topology *topo, struct block *b) b->prev = topo->tip; topo->tip->next = b; /* FIXME this doesn't seem to be used anywhere */ topo->tip = b; + trace_span_start("wallet_block_add", b); wallet_block_add(topo->ld->wallet, b); + trace_span_end(b); + trace_span_start("topo_add_utxo", b); topo_add_utxos(topo, b); + trace_span_end(b); + + trace_span_start("topo_update_spends", b); topo_update_spends(topo, b); + trace_span_end(b); /* Only keep the transactions we care about. */ + trace_span_start("filter_block_txs", b); filter_block_txs(topo, b); + trace_span_end(b); block_map_add(topo->block_map, b); topo->max_blockheight = b->height; @@ -1056,6 +1066,7 @@ static void get_new_block(struct bitcoind *bitcoind, if (!blkid && !blk) { /* No such block, we're done. */ updates_complete(topo); + trace_span_end(topo); return; } assert(blkid && blk); @@ -1075,6 +1086,7 @@ static void get_new_block(struct bitcoind *bitcoind, } /* Try for next one. */ + trace_span_end(topo); try_extend_tip(topo); } @@ -1083,6 +1095,7 @@ static void try_extend_tip(struct chain_topology *topo) topo->extend_timer = NULL; if (topo->stopping) return; + trace_span_start("extend_tip", topo); bitcoind_getrawblockbyheight(topo->bitcoind, topo->tip->height + 1, get_new_block, topo); } From 44e4b6146857524d659d2ac33a05796a2fb3dd06 Mon Sep 17 00:00:00 2001 From: Christian Decker Date: Thu, 27 Jul 2023 14:47:22 +0930 Subject: [PATCH 6/7] trace: Use User Statically-Defined Tracepoints to emit tracing spans --- common/trace.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/common/trace.c b/common/trace.c index ad3fa0c9953c..3cbb6c2ca716 100644 --- a/common/trace.c +++ b/common/trace.c @@ -13,6 +13,8 @@ #include #if HAVE_USDT +#include + #define MAX_ACTIVE_SPANS 128 #define SPAN_ID_LEN 33 @@ -110,7 +112,7 @@ static void trace_emit(struct span *s) trace_span_id_serialize(s, id); tal_append_fmt(&res, "}, \"traceId\": \"%.*s\"}]", 16, id); - fprintf(stderr, "%s\n", res); + DTRACE_PROBE2(lightningd, span_emit, id, res); tal_free(res); } @@ -145,6 +147,7 @@ void trace_span_start(const char *name, const void *key) s->tags = notleak(tal_arr(NULL, struct span_tag, 0)); s->name = notleak(tal_strdup(NULL, name)); current = s; + DTRACE_PROBE1(lightningd, span_start, s->id); } void trace_span_end(const void *key) @@ -156,6 +159,7 @@ void trace_span_end(const void *key) struct timeabs now = time_now(); s->end_time = (now.ts.tv_sec * 1000000) + now.ts.tv_nsec / 1000; + DTRACE_PROBE1(lightningd, span_end, s->id); trace_emit(s); /* Reset the context span we are in. */ @@ -179,15 +183,17 @@ void trace_span_tag(const void *key, const char *name, const char *value) void trace_span_suspend(const void *key) { + size_t numkey = trace_key(key); + struct span *span = trace_span_find(numkey); current = NULL; - fprintf(stderr, "spanSuspend-%s (%p)\n", "???", key); + DTRACE_PROBE1(lightningd, span_suspend, span->id); } void trace_span_resume(const void *key) { size_t numkey = trace_key(key); current = trace_span_find(numkey); - fprintf(stderr, "spanResume-%s (%p)\n", "???", key); + DTRACE_PROBE1(lightningd, span_resume, current->id); } #else /* HAVE_USDT */ From 5b27344a9301c1ad05751fc3856cedda63cc02d3 Mon Sep 17 00:00:00 2001 From: Christian Decker Date: Thu, 27 Jul 2023 14:47:24 +0930 Subject: [PATCH 7/7] doc: Add documentation for the tracing subsystem Changelog-Added: ops: A new tracing system with minimal performance impact was added allowing performance tracing in productive systems --- doc/developers-guide/tracing.md | 109 ++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 doc/developers-guide/tracing.md diff --git a/doc/developers-guide/tracing.md b/doc/developers-guide/tracing.md new file mode 100644 index 000000000000..cce7ac67f5bc --- /dev/null +++ b/doc/developers-guide/tracing.md @@ -0,0 +1,109 @@ +# Tracing CLN Performance + +CLN includes a simple opentracing exporter that allows tracing the +execution of the node in real-time, without incurring a performance +penalty when not listening for traces. Quoting the [Wikipedia][wp] +entry on Tracing: + +> In software engineering, tracing involves a specialized use of +> logging to record information about a program's execution. This +> information is typically used by programmers for debugging purposes, +> and additionally, depending on the type and detail of information +> contained in a trace log, by experienced system administrators or +> technical-support personnel and by software monitoring tools to +> diagnose common problems with software. + +The tracing system in CLN is implemented using [USDTs][usdt] (no, not +that kind of [USDT][tether]). As such it emits events into the kernel, +from where an exporter can receive them. If no exporter is configured +then the kernel will replace the call-sites of the probe with a `NOP`, +thus causing only minimal overhead when not tracing. + +## Compiling with tracing support + +CLN will build with tracing support if the necessary headers +(`sys/sdt.h`) are present during the compilation. For debian and +ubuntu based systems that is easily achieved by installing +`systemtap-sdt-dev`: + +```bash +# apt-get install -y systemtap-sdt-dev +``` +- +Don't forget to run `./configure` and `make` to recompile after +installing the dependencies. `config.vars` should contain the +following line after running `./configure`: + +``` +HAVE_USDT=1 +``` + +If you have a compiled binary you can verify whether it was compiled +with USDT support with the following command: + +```bash +$ readelf -S lightningd/lightningd | grep -i sdt +``` + +Alternatively you can list the tracepoints in the binary with the +following: + +```bash +$ sudo bpftrace -l 'U:lightningd/lightningd:*' +usdt:lightningd/lightningd:lightningd:span_emit +usdt:lightningd/lightningd:lightningd:span_end +usdt:lightningd/lightningd:lightningd:span_resume +usdt:lightningd/lightningd:lightningd:span_start +usdt:lightningd/lightningd:lightningd:span_suspend +``` + +## Exporters + +The simplest way to get started with eBPF in general (which the +tracing is built upon) is the `bpftrace` command that we've already +seen above when checking if the binary was built with tracing support. + +```bash +$ sudo bpftrace -l 'U:lightningd/lightningd:*' +usdt:lightningd/lightningd:lightningd:span_emit +usdt:lightningd/lightningd:lightningd:span_end +usdt:lightningd/lightningd:lightningd:span_resume +usdt:lightningd/lightningd:lightningd:span_start +usdt:lightningd/lightningd:lightningd:span_suspend +``` + +We want to attach to the `span_emit` probe, as that's the one getting +the opentracing-compatible JSON string passed as an argument, and we'd +like to extract that. + +```bash +$ export BPFTRACE_STRLEN=200 +$ sudo -E bpftrace -e 'U:../lightning/lightningd/lightningd:span_emit {printf("%s\n", str(arg1, 10240));}' + +Attaching 1 probe... +[{"id": "5006000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202283751653, "duration": 154,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "sql"}, "traceId": "b7f9b1 +[{"id": "5106000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202283752515, "duration": 146,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "listnodes"}, "traceId": " +[{"id": "8206000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202283759889, "duration": 657,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "dev-memleak"}, "traceId": +[{"id": "8306000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202283784095, "duration": 135,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "dev-report-fds"}, "traceI +[{"id": "8406000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202283785116, "duration": 204,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "stop"}, "traceId": "f6d42 +[{"id": "3100000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202283860017, "duration": 770,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "dev-memleak"}, "traceId": +[{"id": "3200000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202283992271, "duration": 155,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "dev-report-fds"}, "traceI +[{"id": "3300000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202283993745, "duration": 370,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "stop"}, "traceId": "92576 +[{"id": "5206000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202284070125, "duration": 911,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "dev-memleak"}, "traceId": +[{"id": "5506000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202284313462, "duration": 62,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "dev-report-fds"}, "traceId +[{"id": "5606000000000000", "name": "lightningd/jsonrpc", "timestamp": 1690202284314014, "duration": 117,"localEndpoint": { "serviceName": "lightningd"}, "tags": {"method": "stop"}, "traceId": "b7f9b +[{"id": "5406000000000000", "name": "plugin/bitcoind", "timestamp": 1690202284312469, "duration": 4774,"localEndpoint": { "serviceName": "lightningd"}, "parentId": "5306000000000000","tags": {"method +[{"id": "5306000000000000", "name": "extend_tip", "timestamp": 1690202284312428, "duration": 4902,"localEndpoint": { "serviceName": "lightningd"}, "tags": {}, "traceId": "b7f9b1e8af12d252"}] + +``` + +Notice that due to a [limitation][200char] in `bpftrace` you'll at +most get the first 200 bytes of the payload. If you write your own +exporter you'll be able to specify the size of the buffer that is +being used, and can extract the entire span. + +[wp]: https://en.wikipedia.org/wiki/Tracing_(software) +[usdt]: https://illumos.org/books/dtrace/chp-usdt.html +[tether]: https://en.wikipedia.org/wiki/Tether_(cryptocurrency) +[bcc]: https://github.com/iovisor/bcc +[200char]: https://github.com/iovisor/bpftrace/issues/305