From 9cc9ac8ca5e30f10eed5ae837c2b54e90f6fb1ee Mon Sep 17 00:00:00 2001 From: tinboxw Date: Fri, 2 Dec 2022 11:15:06 +0800 Subject: [PATCH] pdump/dumpcap/misserr/ipfrag/mempoolops/meson_options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. app/dumpcap : 保留设备原本状态(混杂模式); 增加file-prefix 参数 2. lib/pdump : 增加多进程支持(当以子进程方式操作tx/rx网卡时,主进程将收到enable/disable_dump后,以广播的形式通知订阅了pdump(action)的子进程enable/disable_dump) 3. lib/mempool/rte_mempool_ops.c 对注册的ops表进行排序处理;当主进程和子进程不是相同bin文件时,初始化ops的顺序可能不一致,会导致mempool->ops_index 不一致,最后引起coredump问题。 myapp和dpdk-pdump便出现了该问题。两个app初始化的 ·rte_mempool_ops_table· 值不一样 4. meson_options.txt add enable_mempool_debug option; will on/off RTE_LIBRTE_MEMPOOL_DEBUG 5. lib/ethdev/rte_ethdev.c 修改rte_eth_dev_start函数,提前eth_dev_fp_ops_setup暴露快速接口。 primary进程中,应该先将快速接口暴露,再启动网卡设备,以防出现启动设备后,接收队列填满,但消费线程还未及时消费的情况(会出现 misserr 的统计错误信息) 6. lib/ip_frag/rte_ip_frag.h lib/ip_frag/rte_ipv4_reassembly.c lib/ip_frag/rte_ipv6_reassembly.c ip分片重组增加接口,将分组片数返回 --- app/dumpcap/main.c | 49 ++++++++++++++++++++++++++----- config/meson.build | 1 + lib/ethdev/rte_ethdev.c | 6 ++-- lib/hash/meson.build | 4 +++ lib/ip_frag/rte_ip_frag.h | 9 ++++++ lib/ip_frag/rte_ipv4_reassembly.c | 13 ++++++-- lib/ip_frag/rte_ipv6_reassembly.c | 15 ++++++++-- lib/mempool/rte_mempool_ops.c | 15 ++++++++++ lib/pdump/rte_pdump.c | 45 ++++++++++++++++++++++++++-- meson_options.txt | 2 ++ 10 files changed, 142 insertions(+), 17 deletions(-) diff --git a/app/dumpcap/main.c b/app/dumpcap/main.c index a6041d4ff49..19c33485f66 100644 --- a/app/dumpcap/main.c +++ b/app/dumpcap/main.c @@ -57,7 +57,9 @@ static bool group_read; static bool quiet; static bool promiscuous_mode = true; static bool use_pcapng = true; + static char *output_name; +static const char *file_prefix = NULL; static const char *filter_str; static unsigned int ring_size = 2048; static const char *capture_comment; @@ -78,6 +80,8 @@ static size_t file_size; struct interface { TAILQ_ENTRY(interface) next; uint16_t port; + int old_promiscuous_mode; /* 保留设备原本状态 */ + char name[RTE_ETH_NAME_MAX_LEN]; struct rte_rxtx_callback *rx_cb[RTE_MAX_QUEUES_PER_PORT]; @@ -96,6 +100,9 @@ typedef union { static void usage(void) { printf("Usage: %s [options] ...\n\n", progname); + printf("DPDK args:\n" + " -x , --file-prefix \n" + " Prefix for hugepage filenames\n"); printf("Capture Interface:\n" " -i name or port index of interface\n" " -f packet filter in libpcap filter syntax\n"); @@ -318,12 +325,13 @@ static void parse_opts(int argc, char **argv) { "ring-buffer", required_argument, NULL, 'b' }, { "snapshot-length", required_argument, NULL, 's' }, { "version", no_argument, NULL, 'v' }, + { "file-prefix", required_argument, NULL, 'x' }, { NULL }, }; int option_index, c; for (;;) { - c = getopt_long(argc, argv, "a:b:c:dDf:ghi:nN:pPqs:vw:", + c = getopt_long(argc, argv, "a:b:c:dDf:ghi:nN:pPqs:vw:x:", long_options, &option_index); if (c == -1) break; @@ -389,6 +397,9 @@ static void parse_opts(int argc, char **argv) case 'w': output_name = optarg; break; + case 'x': + file_prefix = optarg; + break; case 'v': printf("%s\n", version()); exit(0); @@ -424,7 +435,7 @@ cleanup_pdump_resources(void) TAILQ_FOREACH(intf, &interfaces, next) { rte_pdump_disable(intf->port, RTE_PDUMP_ALL_QUEUES, RTE_PDUMP_FLAG_RXTX); - if (promiscuous_mode) + if (promiscuous_mode && !intf->old_promiscuous_mode) rte_eth_promiscuous_disable(intf->port); } } @@ -514,9 +525,13 @@ static void dpdk_init(void) "--log-level", "notice" }; - const int eal_argc = RTE_DIM(args); + int eal_argc = RTE_DIM(args); char **eal_argv; - unsigned int i; + int i; + + if(file_prefix != NULL){ + eal_argc += 2; + } /* DPDK API requires mutable versions of command line arguments. */ eal_argv = calloc(eal_argc + 1, sizeof(char *)); @@ -527,6 +542,16 @@ static void dpdk_init(void) for (i = 1; i < RTE_DIM(args); i++) eal_argv[i] = strdup(args[i]); + if(file_prefix != NULL){ + eal_argv[i] = strdup("--file-prefix"); + eal_argv[i + 1] = strdup(file_prefix); + } + + for(i = 0; i < eal_argc; ++i){ + printf("%s ", eal_argv[i]); + } + printf("\n"); + if (rte_eal_init(eal_argc, eal_argv) < 0) rte_exit(EXIT_FAILURE, "EAL init failed: is primary process running?\n"); @@ -680,6 +705,7 @@ static void enable_pdump(struct rte_ring *r, struct rte_mempool *mp) TAILQ_FOREACH(intf, &interfaces, next) { if (promiscuous_mode) { + intf->old_promiscuous_mode = rte_eth_promiscuous_get(intf->port); ret = rte_eth_promiscuous_enable(intf->port); if (ret != 0) fprintf(stderr, @@ -740,13 +766,14 @@ pcap_write_packets(pcap_dumper_t *dumper, } /* Process all packets in ring and dump to capture file */ -static int process_ring(dumpcap_out_t out, struct rte_ring *r) +static int process_ring(dumpcap_out_t out, struct rte_ring *r, int all) { struct rte_mbuf *pkts[BURST_SIZE]; unsigned int avail, n; static unsigned int empty_count; ssize_t written; +again: n = rte_ring_sc_dequeue_burst(r, (void **) pkts, BURST_SIZE, &avail); if (n == 0) { @@ -772,6 +799,11 @@ static int process_ring(dumpcap_out_t out, struct rte_ring *r) file_size += written; packets_received += n; + + if(all){ + if(avail != 0) goto again; + } + if (!quiet) show_count(packets_received); @@ -813,7 +845,7 @@ int main(int argc, char **argv) } while (!__atomic_load_n(&quit_signal, __ATOMIC_RELAXED)) { - if (process_ring(out, r) < 0) { + if (process_ring(out, r, 0) < 0) { fprintf(stderr, "pcapng file write failed; %s\n", strerror(errno)); break; @@ -830,6 +862,10 @@ int main(int argc, char **argv) break; } + cleanup_pdump_resources(); + + process_ring(out, r, 1); + end_time = create_timestamp(); disable_primary_monitor(); @@ -841,7 +877,6 @@ int main(int argc, char **argv) else pcap_dump_close(out.dumper); - cleanup_pdump_resources(); rte_free(bpf_filter); rte_ring_free(r); rte_mempool_free(mp); diff --git a/config/meson.build b/config/meson.build index 7f7b6c92fdd..96e471e81c1 100644 --- a/config/meson.build +++ b/config/meson.build @@ -297,6 +297,7 @@ endforeach dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports')) dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet')) dpdk_conf.set('RTE_ENABLE_TRACE_FP', get_option('enable_trace_fp')) +dpdk_conf.set('RTE_LIBRTE_MEMPOOL_DEBUG', get_option('enable_mempool_debug')) # values which have defaults which may be overridden dpdk_conf.set('RTE_MAX_VFIO_GROUPS', 64) dpdk_conf.set('RTE_DRIVER_MEMPOOL_BUCKET_SIZE_KB', 64) diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index 1979dc08509..446be98d801 100644 --- a/lib/ethdev/rte_ethdev.c +++ b/lib/ethdev/rte_ethdev.c @@ -1471,6 +1471,9 @@ rte_eth_dev_start(uint16_t port_id) if (*dev_info.dev_flags & RTE_ETH_DEV_NOLIVE_MAC_ADDR) eth_dev_mac_restore(dev, &dev_info); + /* expose selection of PMD fast-path functions */ + eth_dev_fp_ops_setup(rte_eth_fp_ops + port_id, dev); + diag = (*dev->dev_ops->dev_start)(dev); if (diag == 0) dev->data->dev_started = 1; @@ -1497,9 +1500,6 @@ rte_eth_dev_start(uint16_t port_id) (*dev->dev_ops->link_update)(dev, 0); } - /* expose selection of PMD fast-path functions */ - eth_dev_fp_ops_setup(rte_eth_fp_ops + port_id, dev); - rte_ethdev_trace_start(port_id); return 0; } diff --git a/lib/hash/meson.build b/lib/hash/meson.build index 2f757d45f9b..4d80e618884 100644 --- a/lib/hash/meson.build +++ b/lib/hash/meson.build @@ -8,6 +8,10 @@ headers = files( 'rte_jhash.h', 'rte_thash.h', 'rte_thash_gfni.h', + + # export the file(rte_cuckoo_hash.h/rte_cmp_x86.h) for exprobe-cli + 'rte_cuckoo_hash.h', + 'rte_cmp_x86.h', ) indirect_headers += files( 'rte_crc_arm64.h', diff --git a/lib/ip_frag/rte_ip_frag.h b/lib/ip_frag/rte_ip_frag.h index 7d2abe116a5..f0ac7b20961 100644 --- a/lib/ip_frag/rte_ip_frag.h +++ b/lib/ip_frag/rte_ip_frag.h @@ -128,6 +128,11 @@ struct rte_mbuf *rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, struct rte_mbuf *mb, uint64_t tms, struct rte_ipv6_hdr *ip_hdr, struct rte_ipv6_fragment_ext *frag_hdr); +struct rte_mbuf *rte_ipv6_frag_reassemble_packet2(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, + struct rte_mbuf *mb, uint64_t tms, struct rte_ipv6_hdr *ip_hdr, + struct rte_ipv6_fragment_ext *frag_hdr, uint32_t* nb_frags); + /** * Return a pointer to the packet's fragment header, if found. * It only looks at the extension header that's right after the fixed IPv6 @@ -200,6 +205,10 @@ int32_t rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, struct rte_mbuf * rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, struct rte_ipv4_hdr *ip_hdr); +struct rte_mbuf * rte_ipv4_frag_reassemble_packet2(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, + struct rte_mbuf *mb, uint64_t tms, struct rte_ipv4_hdr *ip_hdr, + uint32_t* nb_frags); /** * Check if the IPv4 packet is fragmented diff --git a/lib/ip_frag/rte_ipv4_reassembly.c b/lib/ip_frag/rte_ipv4_reassembly.c index 4a89a5f5365..bdd3e4dfc53 100644 --- a/lib/ip_frag/rte_ipv4_reassembly.c +++ b/lib/ip_frag/rte_ipv4_reassembly.c @@ -95,9 +95,9 @@ ipv4_frag_reassemble(struct ip_frag_pkt *fp) * - not all fragments of the packet are collected yet. */ struct rte_mbuf * -rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, +rte_ipv4_frag_reassemble_packet2(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, - struct rte_ipv4_hdr *ip_hdr) + struct rte_ipv4_hdr *ip_hdr, uint32_t* nb_frags) { struct ip_frag_pkt *fp; struct ip_frag_key key; @@ -157,6 +157,7 @@ rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, /* process the fragmented packet. */ mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, ip_flag); + if(nb_frags) *nb_frags = fp->last_idx; ip_frag_inuse(tbl, fp); IP_FRAG_LOG(DEBUG, "%s:%d:\n" @@ -171,3 +172,11 @@ rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, return mb; } + +struct rte_mbuf * +rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, + struct rte_ipv4_hdr *ip_hdr){ + uint32_t nb_frags; + return rte_ipv4_frag_reassemble_packet2(tbl, dr, mb, tms, ip_hdr, &nb_frags); +} \ No newline at end of file diff --git a/lib/ip_frag/rte_ipv6_reassembly.c b/lib/ip_frag/rte_ipv6_reassembly.c index d4019e87e63..9e2d27b0d45 100644 --- a/lib/ip_frag/rte_ipv6_reassembly.c +++ b/lib/ip_frag/rte_ipv6_reassembly.c @@ -134,9 +134,10 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp) #define MORE_FRAGS(x) (((x) & 0x100) >> 8) #define FRAG_OFFSET(x) (rte_cpu_to_be_16(x) >> 3) struct rte_mbuf * -rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, +rte_ipv6_frag_reassemble_packet2(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, - struct rte_ipv6_hdr *ip_hdr, struct rte_ipv6_fragment_ext *frag_hdr) + struct rte_ipv6_hdr *ip_hdr, struct rte_ipv6_fragment_ext *frag_hdr, + uint32_t* nb_frags) { struct ip_frag_pkt *fp; struct ip_frag_key key; @@ -202,6 +203,7 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, /* process the fragmented packet. */ mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, MORE_FRAGS(frag_hdr->frag_data)); + *nb_frags = fp->last_idx; ip_frag_inuse(tbl, fp); IP_FRAG_LOG(DEBUG, "%s:%d:\n" @@ -216,3 +218,12 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, return mb; } + +struct rte_mbuf * +rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, + struct rte_ipv6_hdr *ip_hdr, struct rte_ipv6_fragment_ext *frag_hdr){ + uint32_t nb_frags; + return rte_ipv6_frag_reassemble_packet2(tbl, dr, mb, tms, ip_hdr, frag_hdr, &nb_frags); +} + diff --git a/lib/mempool/rte_mempool_ops.c b/lib/mempool/rte_mempool_ops.c index 2d36dee8f0e..e9a7f67040b 100644 --- a/lib/mempool/rte_mempool_ops.c +++ b/lib/mempool/rte_mempool_ops.c @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -19,6 +20,15 @@ struct rte_mempool_ops_table rte_mempool_ops_table = { .num_ops = 0 }; +static inline int +qsort_cmp_ops(const void *va, const void *vb) +{ + const struct rte_mempool_ops *a = va; + const struct rte_mempool_ops *b = vb; + + return strcmp(a->name, b->name); +} + /* add a new ops struct in rte_mempool_ops_table, return its index. */ int rte_mempool_register_ops(const struct rte_mempool_ops *h) @@ -65,6 +75,11 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h) ops->get_info = h->get_info; ops->dequeue_contig_blocks = h->dequeue_contig_blocks; + qsort(rte_mempool_ops_table.ops, + ops_index, + sizeof(rte_mempool_ops_table.ops[0]), + qsort_cmp_ops); + rte_spinlock_unlock(&rte_mempool_ops_table.sl); return ops_index; diff --git a/lib/pdump/rte_pdump.c b/lib/pdump/rte_pdump.c index b3a62df5916..69f7c699d58 100644 --- a/lib/pdump/rte_pdump.c +++ b/lib/pdump/rte_pdump.c @@ -385,6 +385,37 @@ set_pdump_rxtx_cbs(const struct pdump_request *p) return ret; } +static int +broadcast_secondary_replay(__rte_unused const struct rte_mp_msg *request, + __rte_unused const struct rte_mp_reply *reply) +{ + struct rte_mp_msg *mp_rep; + struct pdump_response *resp; + + // ignore error... + for(int i = 0; i < reply->nb_received; ++i){ + mp_rep = &reply->msgs[i]; + resp = (struct pdump_response *)mp_rep->param; + //rte_errno = resp->err_value; + if (resp->err_value) + PDUMP_LOG(ERR, "failed to recv from client when call brodcast_secondary_replay(ignore error), err:%d,%s\n", + resp->err_value, rte_strerror(-resp->err_value)); + } + + return 0; +} + +static void +broadcast_secondary(const struct rte_mp_msg *mp_msg) +{ + struct rte_mp_msg mp_req; + struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; + memcpy(&mp_req, mp_msg, sizeof(mp_req)); + + // ignore error + rte_mp_request_async(&mp_req, &ts, broadcast_secondary_replay); +} + static int pdump_server(const struct rte_mp_msg *mp_msg, const void *peer) { @@ -401,6 +432,11 @@ pdump_server(const struct rte_mp_msg *mp_msg, const void *peer) resp->ver = cli_req->ver; resp->res_op = cli_req->op; resp->err_value = set_pdump_rxtx_cbs(cli_req); + + if (rte_eal_process_type() == RTE_PROC_PRIMARY){ + // broadcast every secondary process + broadcast_secondary(mp_msg); + } } rte_strscpy(mp_resp.name, PDUMP_MP, RTE_MP_MAX_NAME_LEN); @@ -424,9 +460,12 @@ rte_pdump_init(void) mz = rte_memzone_reserve(MZ_RTE_PDUMP_STATS, sizeof(*pdump_stats), rte_socket_id(), 0); if (mz == NULL) { - PDUMP_LOG(ERR, "cannot allocate pdump statistics\n"); - rte_errno = ENOMEM; - return -1; + mz = rte_memzone_lookup(MZ_RTE_PDUMP_STATS); + if (mz == NULL){ + PDUMP_LOG(ERR, "cannot allocate pdump statistics\n"); + rte_errno = ENOMEM; + return -1; + } } pdump_stats = mz->addr; pdump_stats->mz = mz; diff --git a/meson_options.txt b/meson_options.txt index 7c220ad68db..1198338a4c1 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -44,6 +44,8 @@ option('platform', type: 'string', value: 'native', description: 'Platform to build, either "native", "generic" or a SoC. Please refer to the Linux build guide for more information.') option('enable_trace_fp', type: 'boolean', value: false, description: 'enable fast path trace points.') +option('enable_mempool_debug', type: 'boolean', value: false, description: + 'enable mempool debug.') option('tests', type: 'boolean', value: true, description: 'build unit tests') option('use_hpet', type: 'boolean', value: false, description: