diff --git a/app/test-compress-perf/comp_perf_options.h b/app/test-compress-perf/comp_perf_options.h index 828a7309d82..4c34dcd8aab 100644 --- a/app/test-compress-perf/comp_perf_options.h +++ b/app/test-compress-perf/comp_perf_options.h @@ -21,6 +21,7 @@ enum cleanup_st { ST_TEST_DATA, ST_COMPDEV, ST_INPUT_DATA, + ST_DICTIONARY_DATA, ST_MEMORY_ALLOC, ST_DURING_TEST }; @@ -48,10 +49,13 @@ struct range_list { struct comp_test_data { char driver_name[RTE_DEV_NAME_MAX_LEN]; char input_file[PATH_MAX]; + char dictionary_file[PATH_MAX]; enum cperf_test_type test; uint8_t *input_data; size_t input_data_sz; + uint8_t *dictionary_data; + size_t dictionary_data_sz; uint16_t nb_qps; uint16_t seg_sz; uint16_t out_seg_sz; diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c index 6d8c370fc2e..49197dc2900 100644 --- a/app/test-compress-perf/comp_perf_options_parse.c +++ b/app/test-compress-perf/comp_perf_options_parse.c @@ -31,6 +31,7 @@ #define CPERF_LEVEL ("compress-level") #define CPERF_WINDOW_SIZE ("window-sz") #define CPERF_EXTERNAL_MBUFS ("external-mbufs") +#define CPERF_DICTIONARY ("dictionary") /* cyclecount-specific options */ #define CPERF_CYCLECOUNT_DELAY_US ("cc-delay-us") @@ -71,6 +72,7 @@ usage(char *progname) " keeping the data directly in mbuf area\n" " --cc-delay-us N: delay between enqueue and dequeue operations in microseconds\n" " valid only for cyclecount perf test (default: 500 us)\n" + " --dictionary NAME: file with dictionary\n" " -h: prints this help\n", progname); } @@ -609,6 +611,17 @@ parse_external_mbufs(struct comp_test_data *test_data, return 0; } +static int +parse_dictionary_file(struct comp_test_data *test_data, const char *arg) +{ + if (strlen(arg) > (sizeof(test_data->dictionary_file) - 1)) + return -1; + + strlcpy(test_data->dictionary_file, arg, sizeof(test_data->dictionary_file)); + + return 0; +} + static int parse_cyclecount_delay_us(struct comp_test_data *test_data, const char *arg) @@ -647,6 +660,7 @@ static struct option lgopts[] = { { CPERF_LEVEL, required_argument, 0, 0 }, { CPERF_WINDOW_SIZE, required_argument, 0, 0 }, { CPERF_EXTERNAL_MBUFS, 0, 0, 0 }, + { CPERF_DICTIONARY, required_argument, 0, 0 }, { CPERF_CYCLECOUNT_DELAY_US, required_argument, 0, 0 }, { NULL, 0, 0, 0 } }; @@ -671,6 +685,7 @@ comp_perf_opts_parse_long(int opt_idx, struct comp_test_data *test_data) { CPERF_LEVEL, parse_level }, { CPERF_WINDOW_SIZE, parse_window_sz }, { CPERF_EXTERNAL_MBUFS, parse_external_mbufs }, + { CPERF_DICTIONARY, parse_dictionary_file }, { CPERF_CYCLECOUNT_DELAY_US, parse_cyclecount_delay_us }, }; unsigned int i; diff --git a/app/test-compress-perf/comp_perf_test_verify.c b/app/test-compress-perf/comp_perf_test_verify.c index 09d97c5cf77..ab74396d09d 100644 --- a/app/test-compress-perf/comp_perf_test_verify.c +++ b/app/test-compress-perf/comp_perf_test_verify.c @@ -64,6 +64,8 @@ main_loop(struct cperf_verify_ctx *ctx, enum rte_comp_xform_type type) int res = 0; int allocated = 0; uint32_t out_seg_sz; + uint8_t dict[DEFLATE_MAX_WINDOW_SIZE] = {0}; + uint16_t window_size = (1ULL << test_data->window_sz); if (test_data == NULL || !test_data->burst_sz) { RTE_LOG(ERR, USER1, @@ -71,6 +73,20 @@ main_loop(struct cperf_verify_ctx *ctx, enum rte_comp_xform_type type) return -1; } + if (test_data->dictionary_data) { + if (test_data->dictionary_data_sz >= window_size) { + memcpy(dict, + test_data->dictionary_data + + (test_data->dictionary_data_sz - window_size), + window_size); + } else if (test_data->dictionary_data_sz < window_size) { + memcpy(dict + (window_size - test_data->dictionary_data_sz), + test_data->dictionary_data, + test_data->dictionary_data_sz); + } + } + + ops = rte_zmalloc_socket(NULL, 2 * mem->total_bufs * sizeof(struct rte_comp_op *), 0, rte_socket_id()); @@ -91,12 +107,14 @@ main_loop(struct cperf_verify_ctx *ctx, enum rte_comp_xform_type type) .level = test_data->level, .window_size = test_data->window_sz, .chksum = RTE_COMP_CHECKSUM_NONE, - .hash_algo = RTE_COMP_HASH_ALGO_NONE + .hash_algo = RTE_COMP_HASH_ALGO_NONE, } }; - if (test_data->test_algo == RTE_COMP_ALGO_DEFLATE) + if (test_data->test_algo == RTE_COMP_ALGO_DEFLATE) { xform.compress.deflate.huffman = test_data->huffman_enc; - else if (test_data->test_algo == RTE_COMP_ALGO_LZ4) + xform.compress.deflate.dictionary = dict; + xform.compress.deflate.dictionary_len = window_size; + } else if (test_data->test_algo == RTE_COMP_ALGO_LZ4) xform.compress.lz4.flags = test_data->lz4_flags; output_data_ptr = ctx->mem.compressed_data; output_data_sz = &ctx->comp_data_sz; @@ -113,7 +131,10 @@ main_loop(struct cperf_verify_ctx *ctx, enum rte_comp_xform_type type) .hash_algo = RTE_COMP_HASH_ALGO_NONE } }; - if (test_data->test_algo == RTE_COMP_ALGO_LZ4) + if (test_data->test_algo == RTE_COMP_ALGO_DEFLATE) { + xform.decompress.inflate.dictionary = dict; + xform.decompress.inflate.dictionary_len = window_size; + } else if (test_data->test_algo == RTE_COMP_ALGO_LZ4) xform.decompress.lz4.flags = test_data->lz4_flags; output_data_ptr = ctx->mem.decompressed_data; output_data_sz = &ctx->decomp_data_sz; @@ -194,7 +215,17 @@ main_loop(struct cperf_verify_ctx *ctx, enum rte_comp_xform_type type) rte_pktmbuf_pkt_len(input_bufs[buf_id]); ops[op_id]->dst.offset = 0; ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; - ops[op_id]->input_chksum = buf_id; + if ((xform.type == RTE_COMP_DECOMPRESS) && + (xform.decompress.chksum + == RTE_COMP_CHECKSUM_3GPP_PDCP_UDC)) { + uint8_t *udc_header + = rte_pktmbuf_mtod(ops[op_id]->m_src, uint8_t *); + ops[op_id]->input_chksum = *udc_header & 0xf; + ops[op_id]->src.offset = 1; + } else { + ops[op_id]->input_chksum = buf_id; + ops[op_id]->src.offset = 0; + } ops[op_id]->private_xform = priv_xform; } diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c index 70ce4316cc3..e61b246fd92 100644 --- a/app/test-compress-perf/main.c +++ b/app/test-compress-perf/main.c @@ -335,6 +335,89 @@ comp_perf_dump_input_data(struct comp_test_data *test_data) return ret; } +static int +comp_perf_dump_dictionary_data(struct comp_test_data *test_data) +{ + FILE *f = fopen(test_data->dictionary_file, "r"); + int ret = -1; + + if (f == NULL) { + RTE_LOG(ERR, USER1, "Dictionary file not specified\n"); + test_data->dictionary_data_sz = 0; + test_data->dictionary_data = NULL; + ret = 0; + goto end; + } + + if (fseek(f, 0, SEEK_END) != 0) { + RTE_LOG(ERR, USER1, "Size of input could not be calculated\n"); + goto end; + } + size_t actual_file_sz = ftell(f); + /* If extended input data size has not been set, + * input data size = file size + */ + + if (test_data->dictionary_data_sz == 0) + test_data->dictionary_data_sz = actual_file_sz; + + if (test_data->dictionary_data_sz <= 0 || actual_file_sz <= 0 || + fseek(f, 0, SEEK_SET) != 0) { + RTE_LOG(ERR, USER1, "Size of input could not be calculated\n"); + goto end; + } + + test_data->dictionary_data = rte_zmalloc_socket(NULL, + test_data->dictionary_data_sz, 0, rte_socket_id()); + + if (test_data->dictionary_data == NULL) { + RTE_LOG(ERR, USER1, "Memory to hold the data from the dictionary " + "file could not be allocated\n"); + goto end; + } + + size_t remaining_data = test_data->dictionary_data_sz; + uint8_t *data = test_data->dictionary_data; + + while (remaining_data > 0) { + size_t data_to_read = RTE_MIN(remaining_data, actual_file_sz); + + if (fread(data, data_to_read, 1, f) != 1) { + RTE_LOG(ERR, USER1, "Input file could not be read\n"); + goto end; + } + if (fseek(f, 0, SEEK_SET) != 0) { + RTE_LOG(ERR, USER1, + "Size of input could not be calculated\n"); + goto end; + } + remaining_data -= data_to_read; + data += data_to_read; + } + + printf("\n"); + if (test_data->dictionary_data_sz > actual_file_sz) + RTE_LOG(INFO, USER1, + "%zu bytes read from file %s, extending the file %.2f times\n", + test_data->dictionary_data_sz, test_data->dictionary_file, + (double)test_data->dictionary_data_sz/actual_file_sz); + else + RTE_LOG(INFO, USER1, + "%zu bytes read from file %s\n", + test_data->dictionary_data_sz, test_data->dictionary_file); + + ret = 0; + +end: + if (f) + fclose(f); + + if (test_data->dictionary_data) + rte_free(test_data->dictionary_data); + + return ret; +} + static void comp_perf_cleanup_on_signal(int signalNumber __rte_unused) { @@ -407,6 +490,13 @@ main(int argc, char **argv) } test_data->cleanup = ST_INPUT_DATA; + if (comp_perf_dump_dictionary_data(test_data) < 0) { + ret = EXIT_FAILURE; + goto end; + } + + test_data->cleanup = ST_DICTIONARY_DATA; + if (test_data->level_lst.inc != 0) test_data->level = test_data->level_lst.min; @@ -496,6 +586,9 @@ main(int argc, char **argv) i++; } /* fallthrough */ + case ST_DICTIONARY_DATA: + rte_free(test_data->dictionary_data); + /* fallthrough */ case ST_INPUT_DATA: rte_free(test_data->input_data); /* fallthrough */ diff --git a/buildtools/chkincs/meson.build b/buildtools/chkincs/meson.build index 49dbc552541..9a84c1cc252 100644 --- a/buildtools/chkincs/meson.build +++ b/buildtools/chkincs/meson.build @@ -6,27 +6,26 @@ if not get_option('check_includes') subdir_done() endif +includes = [config_inc] +deps = [] +includes_drivers = [] +deps_drivers = [] + +subdir('staging') + gen_c_file_for_header = find_program('gen_c_file_for_header.py') gen_c_files = generator(gen_c_file_for_header, output: '@BASENAME@.c', arguments: ['@INPUT@', '@OUTPUT@']) cflags = machine_args +if cc.has_argument('-Wno-missing-field-initializers') + cflags += '-Wno-missing-field-initializers' +endif cflags += no_wvla_cflag sources = files('main.c') -sources += gen_c_files.process(dpdk_chkinc_headers) - -# some driver SDK headers depend on these two buses, which are mandatory in build -# so we always include them in deps list -deps = [get_variable('shared_rte_bus_vdev'), get_variable('shared_rte_bus_pci')] -if dpdk_conf.has('RTE_BUS_VMBUS') - deps += get_variable('shared_rte_bus_vmbus') -endif -# add the rest of the libs to the dependencies -foreach l:dpdk_libs_enabled - deps += get_variable('shared_rte_' + l) -endforeach +sources += gen_c_files.process(dpdk_arch_headers + dpdk_headers) executable('chkincs', sources, c_args: cflags, @@ -46,6 +45,27 @@ executable('chkincs-all', sources, dependencies: deps, install: false) +sources_drivers = files('main.c') +sources_drivers += gen_c_files.process(dpdk_drivers_headers) + +executable('chkincs-drv', sources_drivers, + c_args: cflags, + include_directories: includes + includes_drivers, + dependencies: deps + deps_drivers, + install: false) + +executable('chkincs-drv-exp', sources_drivers, + c_args: [cflags, '-DALLOW_EXPERIMENTAL_API'], + include_directories: includes + includes_drivers, + dependencies: deps + deps_drivers, + install: false) + +executable('chkincs-drv-all', sources_drivers, + c_args: [cflags, '-DALLOW_EXPERIMENTAL_API', '-DALLOW_INTERNAL_API'], + include_directories: includes + includes_drivers, + dependencies: deps + deps_drivers, + install: false) + # run tests for c++ builds also if not add_languages('cpp', required: false) subdir_done() @@ -56,7 +76,7 @@ gen_cpp_files = generator(gen_c_file_for_header, arguments: ['@INPUT@', '@OUTPUT@']) cpp_sources = files('main.cpp') -cpp_sources += gen_cpp_files.process(dpdk_chkinc_headers) +cpp_sources += gen_cpp_files.process(dpdk_arch_headers + dpdk_headers) executable('chkincs-cpp', cpp_sources, cpp_args: ['-include', 'rte_config.h', cflags], @@ -76,3 +96,25 @@ executable('chkincs-cpp-all', cpp_sources, include_directories: includes, dependencies: deps, install: false) + +cpp_sources_drivers = files('main.cpp') +cpp_sources_drivers += gen_cpp_files.process(dpdk_drivers_headers) + +executable('chkincs-drv-cpp', cpp_sources_drivers, + cpp_args: ['-include', 'rte_config.h', cflags], + include_directories: includes + includes_drivers, + dependencies: deps + deps_drivers, + install: false) + +executable('chkincs-drv-cpp-exp', cpp_sources_drivers, + cpp_args: ['-include', 'rte_config.h', cflags, '-DALLOW_EXPERIMENTAL_API'], + include_directories: includes + includes_drivers, + dependencies: deps + deps_drivers, + install: false) + +executable('chkincs-drv-cpp-all', cpp_sources_drivers, + cpp_args: ['-include', 'rte_config.h', cflags, '-DALLOW_EXPERIMENTAL_API', + '-DALLOW_INTERNAL_API'], + include_directories: includes + includes_drivers, + dependencies: deps + deps_drivers, + install: false) diff --git a/buildtools/chkincs/staging/drivers/meson.build b/buildtools/chkincs/staging/drivers/meson.build new file mode 100644 index 00000000000..dc859e6dfd7 --- /dev/null +++ b/buildtools/chkincs/staging/drivers/meson.build @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2025 Red Hat, Inc. + +includes_drivers += include_directories('.') +deps_drivers += declare_dependency(sources: + custom_target('drivers_headers_staging', + output: 'drivers_headers_staging.stamp', + depends: cleanup_target, + command: [stage_headers_cmd, meson.current_build_dir(), '@OUTPUT@', + dpdk_drivers_headers], + install: false, + ) +) diff --git a/buildtools/chkincs/staging/meson.build b/buildtools/chkincs/staging/meson.build new file mode 100644 index 00000000000..8710df290b0 --- /dev/null +++ b/buildtools/chkincs/staging/meson.build @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2025 Red Hat, Inc. + +stage_headers_cmd = py3 + files('stage-headers.py') + +cleanup_target = custom_target('cleanup_staging', + output: 'cleanup_staging.stamp', + depend_files: dpdk_arch_headers + dpdk_arch_indirect_headers + dpdk_generic_headers + + dpdk_headers + dpdk_indirect_headers + dpdk_drivers_headers, + command: [stage_headers_cmd, '--cleanup', meson.current_build_dir(), '@OUTPUT@'], + install: false, +) + +subdir('drivers') + +includes += include_directories('.') +deps += declare_dependency(sources: + custom_target('arch_headers_staging', + output: 'arch_headers_staging.stamp', + depends: cleanup_target, + command: [stage_headers_cmd, meson.current_build_dir(), '@OUTPUT@', + dpdk_arch_headers + dpdk_arch_indirect_headers], + install: false, + ) +) +deps += declare_dependency(sources: + custom_target('headers_staging', + output: 'headers_staging.stamp', + depends: cleanup_target, + command: [stage_headers_cmd, meson.current_build_dir(), '@OUTPUT@', + dpdk_headers + dpdk_indirect_headers], + install: false, + ) +) +deps += declare_dependency(sources: + custom_target('generic_headers_staging', + output: 'generic_headers_staging.stamp', + depends: cleanup_target, + command: [stage_headers_cmd, join_paths(meson.current_build_dir(), 'generic'), + '@OUTPUT@', dpdk_generic_headers], + install: false, + ) +) diff --git a/buildtools/chkincs/staging/stage-headers.py b/buildtools/chkincs/staging/stage-headers.py new file mode 100755 index 00000000000..3747dc3363c --- /dev/null +++ b/buildtools/chkincs/staging/stage-headers.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2025 Red Hat, Inc. + +""" +Headers staging script for DPDK build system. +""" + +import sys +import os +import shutil +from pathlib import Path + +def main(): + """ + Stage header files into a staging directory or clean the staging directory. + + When invoked with "--cleanup ", removes and recreates the staging directory. + Otherwise, creates the staging directory (if needed) and copies the listed header files into it (preserving filenames and metadata). + + Usage: + stage-headers.py [--cleanup] [headers...] + """ + + if len(sys.argv) < 4: + print("Usage: stage-headers.py [--cleanup] [headers...]") + sys.exit(1) + + if len(sys.argv) == 4 and sys.argv[1] == '--cleanup': + staging_dir = Path(sys.argv[2]) + meson_stamp = Path(sys.argv[3]) + + shutil.rmtree(staging_dir) + staging_dir.mkdir(parents=True, exist_ok=True) + + else: + staging_dir = Path(sys.argv[1]) + meson_stamp = Path(sys.argv[2]) + headers = sys.argv[3:] + + staging_dir.mkdir(parents=True, exist_ok=True) + + for header in headers: + file = Path(header) + shutil.copy2(file, staging_dir / file.name) + + meson_stamp.touch() + +if __name__ == "__main__": + main() diff --git a/config/meson.build b/config/meson.build index 55497f0bf5a..ca63ee10b19 100644 --- a/config/meson.build +++ b/config/meson.build @@ -208,7 +208,8 @@ if not is_windows endif # on some OS, maths functions are in a separate library -if cc.find_library('m', required : false).found() +math_dep = cc.find_library('m', required : false) +if math_dep.found() and cc.links(min_c_code, dependencies: math_dep) # some libs depend on maths lib add_project_link_arguments('-lm', language: 'c') dpdk_extra_ldflags += '-lm' @@ -235,7 +236,7 @@ if meson.is_cross_build() and not meson.get_external_property('numa', true) endif if find_libnuma numa_dep = cc.find_library('numa', required: false) - if numa_dep.found() and cc.has_header('numaif.h') + if numa_dep.found() and cc.has_header('numaif.h') and cc.links(min_c_code, dependencies: numa_dep) dpdk_conf.set10('RTE_HAS_LIBNUMA', true) has_libnuma = true add_project_link_arguments('-lnuma', language: 'c') @@ -245,7 +246,7 @@ endif has_libfdt = false fdt_dep = cc.find_library('fdt', required: false) -if fdt_dep.found() and cc.has_header('fdt.h') +if fdt_dep.found() and cc.has_header('fdt.h') and cc.links(min_c_code, dependencies: fdt_dep) dpdk_conf.set10('RTE_HAS_LIBFDT', true) has_libfdt = true add_project_link_arguments('-lfdt', language: 'c') @@ -253,7 +254,7 @@ if fdt_dep.found() and cc.has_header('fdt.h') endif libexecinfo = cc.find_library('execinfo', required: false) -if libexecinfo.found() +if libexecinfo.found() and cc.links(min_c_code, dependencies: libexecinfo) add_project_link_arguments('-lexecinfo', language: 'c') dpdk_extra_ldflags += '-lexecinfo' endif @@ -288,7 +289,8 @@ if not pcap_dep.found() # pcap got a pkg-config file only in 1.9.0 pcap_dep = cc.find_library(pcap_lib, required: false) endif -if pcap_dep.found() and cc.has_header('pcap.h', dependencies: pcap_dep) +if (pcap_dep.found() and cc.has_header('pcap.h', dependencies: pcap_dep) + and cc.links(min_c_code, dependencies: pcap_dep)) dpdk_conf.set('RTE_HAS_LIBPCAP', 1) dpdk_extra_ldflags += '-l@0@'.format(pcap_lib) endif @@ -296,6 +298,9 @@ endif # for clang 32-bit compiles we need libatomic for 64-bit atomic ops if cc.get_id() == 'clang' and dpdk_conf.get('RTE_ARCH_64') == false atomic_dep = cc.find_library('atomic', required: true) + if not cc.links(min_c_code, dependencies: atomic_dep) + error('broken dependency, "libatomic"') + endif add_project_link_arguments('-latomic', language: 'c') dpdk_extra_ldflags += '-latomic' endif @@ -456,11 +461,7 @@ endif # set the install path for the drivers dpdk_conf.set_quoted('RTE_EAL_PMD_PATH', eal_pmd_path) -install_headers(['rte_config.h'], - subdir: get_option('include_subdir_arch')) - -# enable VFIO only if it is linux OS -dpdk_conf.set('RTE_EAL_VFIO', is_linux) +dpdk_arch_headers += files('rte_config.h') # specify -D_GNU_SOURCE unconditionally add_project_arguments('-D_GNU_SOURCE', language: 'c') diff --git a/devtools/checkpatches.sh b/devtools/checkpatches.sh index 35037aa8f10..9fb8fd0a070 100755 --- a/devtools/checkpatches.sh +++ b/devtools/checkpatches.sh @@ -87,6 +87,14 @@ check_forbidden_additions() { # -f $(dirname $(readlink -f $0))/check-forbidden-tokens.awk \ "$1" || res=1 + # don't call directly install_headers() + awk -v FOLDERS="lib drivers" \ + -v EXPRESSIONS="\\\" \ + -v RET_ON_FAIL=1 \ + -v MESSAGE='Using install_headers()' \ + -f $(dirname $(readlink -f $0))/check-forbidden-tokens.awk \ + "$1" || res=1 + # refrain from using compiler attribute without defining a common macro awk -v FOLDERS="lib drivers app examples" \ -v SKIP_FILES='lib/eal/include/rte_common.h' \ diff --git a/devtools/linux-uapi.sh b/devtools/linux-uapi.sh index c09c43ab6bb..dbf3163bc83 100755 --- a/devtools/linux-uapi.sh +++ b/devtools/linux-uapi.sh @@ -81,6 +81,9 @@ fixup_includes() sed -i "s|^#include ||g" $path sed -i "s|\<__user[[:space:]]||" $path + sed -i 's|#\(ifndef\)[[:space:]]*_UAPI|#\1 |' $path + sed -i 's|#\(define\)[[:space:]]*_UAPI|#\1 |' $path + sed -i 's|#\(endif[[:space:]]*/[*]\)[[:space:]]*_UAPI|#\1 |' $path # Prepend include path with "uapi/" if the header is imported for include in $(sed -ne 's/^#include <\(.*\)>$/\1/p' $path); do diff --git a/doc/api/doxy-api.conf.in b/doc/api/doxy-api.conf.in index 28cec3232f5..bedd9446815 100644 --- a/doc/api/doxy-api.conf.in +++ b/doc/api/doxy-api.conf.in @@ -92,7 +92,6 @@ FILE_PATTERNS = rte_*.h \ PREDEFINED = __DOXYGEN__ \ RTE_ATOMIC \ RTE_HAS_CPUSET \ - VFIO_PRESENT \ __rte_lockable= \ __rte_guarded_by(x)= \ __rte_exclusive_locks_required(x)= \ diff --git a/doc/guides/contributing/linux_uapi.rst b/doc/guides/contributing/linux_uapi.rst index 79bedb478ec..a1bc0ba604a 100644 --- a/doc/guides/contributing/linux_uapi.rst +++ b/doc/guides/contributing/linux_uapi.rst @@ -62,9 +62,15 @@ Header inclusion into library or driver The library or driver willing to make use of imported uAPI headers needs to explicitly include the header file with ``uapi/`` prefix in C files. + +This inclusion must be done before any header external to DPDK is included, +to prevent inclusion of this system uAPI header in any of those external headers. + For example to include VDUSE uAPI: .. code-block:: c #include + #include + ... diff --git a/doc/guides/cryptodevs/bcmfs.rst b/doc/guides/cryptodevs/bcmfs.rst index d18a2539135..049808a49c2 100644 --- a/doc/guides/cryptodevs/bcmfs.rst +++ b/doc/guides/cryptodevs/bcmfs.rst @@ -59,10 +59,6 @@ Information about kernel, rootfs and toolchain can be found at `Broadcom Official Website `__. - .. Note:: - To execute BCMFS PMD, it must be compiled with VFIO_PRESENT flag on the - compiling platform and same gets enabled in rte_vfio.h. - The BCMFS PMD may be compiled natively on a Stingray platform or cross-compiled on an x86 platform. For example, below commands can be executed for cross compiling on x86 platform. diff --git a/doc/guides/nics/features/iavf.ini b/doc/guides/nics/features/iavf.ini index 61c4742197b..7695e3ff7c5 100644 --- a/doc/guides/nics/features/iavf.ini +++ b/doc/guides/nics/features/iavf.ini @@ -7,12 +7,13 @@ ; is selected. ; [Features] -Speed capabilities = Y Link status = Y Rx interrupt = Y +Free Tx mbuf on demand = Y Queue start/stop = Y Runtime Rx queue setup = Y Runtime Tx queue setup = Y +Burst mode info = Y Power mgmt address monitor = Y MTU update = Y Scattered Rx = Y @@ -39,6 +40,7 @@ Packet type parsing = Y Rx descriptor status = Y Tx descriptor status = Y Basic stats = Y +Extended stats = Y Multiprocess aware = Y FreeBSD = Y Linux = Y diff --git a/doc/guides/nics/features/ice.ini b/doc/guides/nics/features/ice.ini index 9c8569740ad..27034935145 100644 --- a/doc/guides/nics/features/ice.ini +++ b/doc/guides/nics/features/ice.ini @@ -14,6 +14,7 @@ Link status event = Y FEC = Y Rx interrupt = Y Fast mbuf free = P +Free Tx mbuf on demand = Y Queue start/stop = Y Burst mode info = Y Power mgmt address monitor = Y diff --git a/doc/guides/tools/comp_perf.rst b/doc/guides/tools/comp_perf.rst index 4299ef0b3f8..f0c6993a2d8 100644 --- a/doc/guides/tools/comp_perf.rst +++ b/doc/guides/tools/comp_perf.rst @@ -98,6 +98,8 @@ Application Options ``--cc-delay-us N``: delay between enqueue and dequeue operations in microseconds, valid only for the cyclecount test (default: 500 us) + ``--dictionary NAME``: file containing pre-defined dictionary + ``-h``: prints this help diff --git a/drivers/baseband/acc/meson.build b/drivers/baseband/acc/meson.build index 64fcf1537af..9278897f76f 100644 --- a/drivers/baseband/acc/meson.build +++ b/drivers/baseband/acc/meson.build @@ -5,11 +5,31 @@ dep_dec5g = dependency('flexran_sdk_ldpc_decoder_5gnr', required: false) if dep_dec5g.found() - ext_deps += cc.find_library('stdc++', required: true) - ext_deps += cc.find_library('irc', required: true) - ext_deps += cc.find_library('imf', required: true) - ext_deps += cc.find_library('ipps', required: true) - ext_deps += cc.find_library('svml', required: true) + stdcpp_dep = cc.find_library('stdc++', required: true) + if not cc.links(min_c_code, dependencies: stdcpp_dep) + error('broken dependency, "libstdc++"') + endif + ext_deps += stdcpp_dep + irc_dep = cc.find_library('irc', required: true) + if not cc.links(min_c_code, dependencies: irc_dep) + error('broken dependency, "libirc"') + endif + ext_deps += irc_dep + imf_dep = cc.find_library('imf', required: true) + if not cc.links(min_c_code, dependencies: imf_dep) + error('broken dependency, "libimf"') + endif + ext_deps += imf_dep + ipps_dep = cc.find_library('ipps', required: true) + if not cc.links(min_c_code, dependencies: ipps_dep) + error('broken dependency, "libipps"') + endif + ext_deps += ipps_dep + svml_dep = cc.find_library('svml', required: true) + if not cc.links(min_c_code, dependencies: svml_dep) + error('broken dependency, "libsvml"') + endif + ext_deps += svml_dep ext_deps += dep_dec5g ext_deps += dependency('flexran_sdk_ldpc_encoder_5gnr', required: true) ext_deps += dependency('flexran_sdk_LDPC_ratematch_5gnr', required: true) @@ -26,4 +46,4 @@ deps += ['bus_pci'] sources = files('acc_common.c', 'rte_acc100_pmd.c', 'rte_vrb_pmd.c') -headers = files('rte_acc_cfg.h') +headers = files('rte_acc_cfg.h', 'rte_acc_common_cfg.h') diff --git a/drivers/baseband/turbo_sw/meson.build b/drivers/baseband/turbo_sw/meson.build index a9035a753e9..5fd5e41c694 100644 --- a/drivers/baseband/turbo_sw/meson.build +++ b/drivers/baseband/turbo_sw/meson.build @@ -6,11 +6,31 @@ dep_turbo = dependency('flexran_sdk_turbo', required: false) dep_dec5g = dependency('flexran_sdk_ldpc_decoder_5gnr', required: false) if dep_turbo.found() - ext_deps += cc.find_library('stdc++', required: true) - ext_deps += cc.find_library('irc', required: true) - ext_deps += cc.find_library('imf', required: true) - ext_deps += cc.find_library('ipps', required: true) - ext_deps += cc.find_library('svml', required: true) + stdcpp_dep = cc.find_library('stdc++', required: true) + if not cc.links(min_c_code, dependencies: stdcpp_dep) + error('broken dependency, "libstdc++"') + endif + ext_deps += stdcpp_dep + irc_dep = cc.find_library('irc', required: true) + if not cc.links(min_c_code, dependencies: irc_dep) + error('broken dependency, "libirc"') + endif + ext_deps += irc_dep + imf_dep = cc.find_library('imf', required: true) + if not cc.links(min_c_code, dependencies: imf_dep) + error('broken dependency, "libimf"') + endif + ext_deps += imf_dep + ipps_dep = cc.find_library('ipps', required: true) + if not cc.links(min_c_code, dependencies: ipps_dep) + error('broken dependency, "libipps"') + endif + ext_deps += ipps_dep + svml_dep = cc.find_library('svml', required: true) + if not cc.links(min_c_code, dependencies: svml_dep) + error('broken dependency, "libsvml"') + endif + ext_deps += svml_dep ext_deps += dep_turbo ext_deps += dependency('flexran_sdk_crc', required: true) ext_deps += dependency('flexran_sdk_rate_matching', required: true) diff --git a/drivers/bus/cdx/cdx_vfio.c b/drivers/bus/cdx/cdx_vfio.c index 37e0c424d4f..c03bc01df7c 100644 --- a/drivers/bus/cdx/cdx_vfio.c +++ b/drivers/bus/cdx/cdx_vfio.c @@ -11,12 +11,15 @@ * */ +#include + #include #include #include #include #include #include + #include #include #include @@ -638,9 +641,9 @@ rte_cdx_vfio_bm_enable(struct rte_cdx_device *dev) feature->argsz = argsz; - feature->flags = RTE_VFIO_DEVICE_FEATURE_BUS_MASTER | VFIO_DEVICE_FEATURE_PROBE; + feature->flags = VFIO_DEVICE_FEATURE_BUS_MASTER | VFIO_DEVICE_FEATURE_PROBE; feature->flags |= VFIO_DEVICE_FEATURE_SET; - ret = ioctl(vfio_dev_fd, RTE_VFIO_DEVICE_FEATURE, feature); + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_FEATURE, feature); if (ret) { CDX_BUS_ERR("Bus Master configuring not supported for device: %s, error: %d (%s)", dev->name, errno, strerror(errno)); @@ -648,9 +651,9 @@ rte_cdx_vfio_bm_enable(struct rte_cdx_device *dev) return ret; } - feature->flags = RTE_VFIO_DEVICE_FEATURE_BUS_MASTER | VFIO_DEVICE_FEATURE_SET; + feature->flags = VFIO_DEVICE_FEATURE_BUS_MASTER | VFIO_DEVICE_FEATURE_SET; vfio_bm_feature->op = VFIO_DEVICE_FEATURE_SET_MASTER; - ret = ioctl(vfio_dev_fd, RTE_VFIO_DEVICE_FEATURE, feature); + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_FEATURE, feature); if (ret < 0) CDX_BUS_ERR("BM Enable Error for device: %s, Error: %d (%s)", dev->name, errno, strerror(errno)); @@ -683,9 +686,9 @@ rte_cdx_vfio_bm_disable(struct rte_cdx_device *dev) feature->argsz = argsz; - feature->flags = RTE_VFIO_DEVICE_FEATURE_BUS_MASTER | VFIO_DEVICE_FEATURE_PROBE; + feature->flags = VFIO_DEVICE_FEATURE_BUS_MASTER | VFIO_DEVICE_FEATURE_PROBE; feature->flags |= VFIO_DEVICE_FEATURE_SET; - ret = ioctl(vfio_dev_fd, RTE_VFIO_DEVICE_FEATURE, feature); + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_FEATURE, feature); if (ret) { CDX_BUS_ERR("Bus Master configuring not supported for device: %s, Error: %d (%s)", dev->name, errno, strerror(errno)); @@ -693,9 +696,9 @@ rte_cdx_vfio_bm_disable(struct rte_cdx_device *dev) return ret; } - feature->flags = RTE_VFIO_DEVICE_FEATURE_BUS_MASTER | VFIO_DEVICE_FEATURE_SET; + feature->flags = VFIO_DEVICE_FEATURE_BUS_MASTER | VFIO_DEVICE_FEATURE_SET; vfio_bm_feature->op = VFIO_DEVICE_FEATURE_CLEAR_MASTER; - ret = ioctl(vfio_dev_fd, RTE_VFIO_DEVICE_FEATURE, feature); + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_FEATURE, feature); if (ret < 0) CDX_BUS_ERR("BM Disable Error for device: %s, Error: %d (%s)", dev->name, errno, strerror(errno)); diff --git a/drivers/bus/fslmc/bus_fslmc_driver.h b/drivers/bus/fslmc/bus_fslmc_driver.h index 442de1a3fb1..74ce7381d73 100644 --- a/drivers/bus/fslmc/bus_fslmc_driver.h +++ b/drivers/bus/fslmc/bus_fslmc_driver.h @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -37,6 +36,8 @@ extern "C" { #endif +struct vfio_device_info; + #define FSLMC_OBJECT_MAX_LEN 32 /**< Length of each device on bus */ #define DPAA2_INVALID_MBUF_SEQN 0 diff --git a/drivers/bus/fslmc/fslmc_bus.c b/drivers/bus/fslmc/fslmc_bus.c index ebc0c1fb4fb..49c61c9d2df 100644 --- a/drivers/bus/fslmc/fslmc_bus.c +++ b/drivers/bus/fslmc/fslmc_bus.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "private.h" #include @@ -574,9 +575,6 @@ fslmc_all_device_support_iova(void) static enum rte_iova_mode rte_dpaa2_get_iommu_class(void) { - bool is_vfio_noiommu_enabled = 1; - bool has_iova_va; - if (rte_eal_iova_mode() == RTE_IOVA_PA) return RTE_IOVA_PA; @@ -584,14 +582,7 @@ rte_dpaa2_get_iommu_class(void) return RTE_IOVA_DC; /* check if all devices on the bus support Virtual addressing or not */ - has_iova_va = fslmc_all_device_support_iova(); - -#ifdef VFIO_PRESENT - is_vfio_noiommu_enabled = rte_vfio_noiommu_is_enabled() == true ? - true : false; -#endif - - if (has_iova_va && !is_vfio_noiommu_enabled) + if (fslmc_all_device_support_iova() != 0 && rte_vfio_noiommu_is_enabled() == 0) return RTE_IOVA_VA; return RTE_IOVA_PA; diff --git a/drivers/bus/fslmc/fslmc_vfio.c b/drivers/bus/fslmc/fslmc_vfio.c index 68439cbd8cf..550d4e0e8d9 100644 --- a/drivers/bus/fslmc/fslmc_vfio.c +++ b/drivers/bus/fslmc/fslmc_vfio.c @@ -5,6 +5,8 @@ * */ +#include + #include #include #include @@ -32,6 +34,7 @@ #include #include #include +#include #include #include "private.h" @@ -190,7 +193,7 @@ fslmc_vfio_add_group(int vfio_group_fd, group->groupid = iommu_group_num; rte_strscpy(group->group_name, group_name, sizeof(group->group_name)); if (rte_vfio_noiommu_is_enabled() > 0) - group->iommu_type = RTE_VFIO_NOIOMMU; + group->iommu_type = VFIO_NOIOMMU_IOMMU; else group->iommu_type = VFIO_TYPE1_IOMMU; LIST_INSERT_HEAD(&s_vfio_container.groups, group, next); @@ -396,8 +399,7 @@ fslmc_vfio_open_group_fd(const char *group_name) /* if primary, try to open the group */ if (rte_eal_process_type() == RTE_PROC_PRIMARY) { /* try regular group format */ - snprintf(filename, sizeof(filename), - VFIO_GROUP_FMT, iommu_group_num); + snprintf(filename, sizeof(filename), RTE_VFIO_GROUP_FMT, iommu_group_num); vfio_group_fd = open(filename, O_RDWR); goto add_vfio_group; @@ -450,8 +452,8 @@ fslmc_vfio_check_extensions(int vfio_container_fd) { int ret; uint32_t idx, n_extensions = 0; - static const int type_id[] = {RTE_VFIO_TYPE1, RTE_VFIO_SPAPR, - RTE_VFIO_NOIOMMU}; + static const int type_id[] = {VFIO_TYPE1_IOMMU, VFIO_SPAPR_TCE_v2_IOMMU, + VFIO_NOIOMMU_IOMMU}; static const char * const type_id_nm[] = {"Type 1", "sPAPR", "No-IOMMU"}; @@ -495,10 +497,10 @@ fslmc_vfio_open_container_fd(void) /* if we're in a primary process, try to open the container */ if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - vfio_container_fd = open(VFIO_CONTAINER_PATH, O_RDWR); + vfio_container_fd = open(RTE_VFIO_CONTAINER_PATH, O_RDWR); if (vfio_container_fd < 0) { DPAA2_BUS_ERR("Open VFIO container(%s), err(%d)", - VFIO_CONTAINER_PATH, vfio_container_fd); + RTE_VFIO_CONTAINER_PATH, vfio_container_fd); ret = vfio_container_fd; goto err_exit; } @@ -851,7 +853,7 @@ fslmc_map_dma(uint64_t vaddr, rte_iova_t iovaddr, size_t len) return fd; return -EIO; } - if (fslmc_vfio_iommu_type(fd) == RTE_VFIO_NOIOMMU) { + if (fslmc_vfio_iommu_type(fd) == VFIO_NOIOMMU_IOMMU) { DPAA2_BUS_DEBUG("Running in NOIOMMU mode"); if (phy != iovaddr) { DPAA2_BUS_ERR("IOVA should support with IOMMU"); @@ -951,7 +953,7 @@ fslmc_unmap_dma(uint64_t vaddr, uint64_t iovaddr, size_t len) return fd; return -EIO; } - if (fslmc_vfio_iommu_type(fd) == RTE_VFIO_NOIOMMU) { + if (fslmc_vfio_iommu_type(fd) == VFIO_NOIOMMU_IOMMU) { DPAA2_BUS_DEBUG("Running in NOIOMMU mode"); return 0; } diff --git a/drivers/bus/fslmc/fslmc_vfio.h b/drivers/bus/fslmc/fslmc_vfio.h index 815970ec38b..c995fd67b84 100644 --- a/drivers/bus/fslmc/fslmc_vfio.h +++ b/drivers/bus/fslmc/fslmc_vfio.h @@ -9,7 +9,6 @@ #define _FSLMC_VFIO_H_ #include -#include /* Pathname of FSL-MC devices directory. */ #define SYSFS_FSL_MC_DEVICES "/sys/bus/fsl-mc/devices" diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c index e32471d8b58..2a9e5196683 100644 --- a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c +++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c @@ -4,6 +4,8 @@ * Copyright 2016-2022 NXP * */ +#include + #include #include #include diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index c20d159218e..66d7e09a6e2 100644 --- a/drivers/bus/pci/linux/pci.c +++ b/drivers/bus/pci/linux/pci.c @@ -64,10 +64,8 @@ rte_pci_map_device(struct rte_pci_device *dev) /* try mapping the NIC resources using VFIO if it exists */ switch (dev->kdrv) { case RTE_PCI_KDRV_VFIO: -#ifdef VFIO_PRESENT if (pci_vfio_is_enabled()) ret = pci_vfio_map_resource(dev); -#endif break; case RTE_PCI_KDRV_IGB_UIO: case RTE_PCI_KDRV_UIO_GENERIC: @@ -93,10 +91,8 @@ rte_pci_unmap_device(struct rte_pci_device *dev) /* try unmapping the NIC resources using VFIO if it exists */ switch (dev->kdrv) { case RTE_PCI_KDRV_VFIO: -#ifdef VFIO_PRESENT if (pci_vfio_is_enabled()) pci_vfio_unmap_resource(dev); -#endif break; case RTE_PCI_KDRV_IGB_UIO: case RTE_PCI_KDRV_UIO_GENERIC: @@ -599,7 +595,6 @@ pci_device_iova_mode(const struct rte_pci_driver *pdrv, switch (pdev->kdrv) { case RTE_PCI_KDRV_VFIO: { -#ifdef VFIO_PRESENT static int is_vfio_noiommu_enabled = -1; if (is_vfio_noiommu_enabled == -1) { @@ -612,7 +607,6 @@ pci_device_iova_mode(const struct rte_pci_driver *pdrv, iova_mode = RTE_IOVA_PA; else if ((pdrv->drv_flags & RTE_PCI_DRV_NEED_IOVA_AS_VA) != 0) iova_mode = RTE_IOVA_VA; -#endif break; } @@ -641,10 +635,8 @@ int rte_pci_read_config(const struct rte_pci_device *device, case RTE_PCI_KDRV_IGB_UIO: case RTE_PCI_KDRV_UIO_GENERIC: return pci_uio_read_config(intr_handle, buf, len, offset); -#ifdef VFIO_PRESENT case RTE_PCI_KDRV_VFIO: return pci_vfio_read_config(device, buf, len, offset); -#endif default: rte_pci_device_name(&device->addr, devname, RTE_DEV_NAME_MAX_LEN); @@ -665,10 +657,8 @@ int rte_pci_write_config(const struct rte_pci_device *device, case RTE_PCI_KDRV_IGB_UIO: case RTE_PCI_KDRV_UIO_GENERIC: return pci_uio_write_config(intr_handle, buf, len, offset); -#ifdef VFIO_PRESENT case RTE_PCI_KDRV_VFIO: return pci_vfio_write_config(device, buf, len, offset); -#endif default: rte_pci_device_name(&device->addr, devname, RTE_DEV_NAME_MAX_LEN); @@ -688,10 +678,8 @@ int rte_pci_mmio_read(const struct rte_pci_device *device, int bar, case RTE_PCI_KDRV_IGB_UIO: case RTE_PCI_KDRV_UIO_GENERIC: return pci_uio_mmio_read(device, bar, buf, len, offset); -#ifdef VFIO_PRESENT case RTE_PCI_KDRV_VFIO: return pci_vfio_mmio_read(device, bar, buf, len, offset); -#endif default: rte_pci_device_name(&device->addr, devname, RTE_DEV_NAME_MAX_LEN); @@ -711,10 +699,8 @@ int rte_pci_mmio_write(const struct rte_pci_device *device, int bar, case RTE_PCI_KDRV_IGB_UIO: case RTE_PCI_KDRV_UIO_GENERIC: return pci_uio_mmio_write(device, bar, buf, len, offset); -#ifdef VFIO_PRESENT case RTE_PCI_KDRV_VFIO: return pci_vfio_mmio_write(device, bar, buf, len, offset); -#endif default: rte_pci_device_name(&device->addr, devname, RTE_DEV_NAME_MAX_LEN); @@ -731,12 +717,10 @@ rte_pci_ioport_map(struct rte_pci_device *dev, int bar, int ret = -1; switch (dev->kdrv) { -#ifdef VFIO_PRESENT case RTE_PCI_KDRV_VFIO: if (pci_vfio_is_enabled()) ret = pci_vfio_ioport_map(dev, bar, p); break; -#endif case RTE_PCI_KDRV_IGB_UIO: case RTE_PCI_KDRV_UIO_GENERIC: ret = pci_uio_ioport_map(dev, bar, p); @@ -757,11 +741,9 @@ rte_pci_ioport_read(struct rte_pci_ioport *p, void *data, size_t len, off_t offset) { switch (p->dev->kdrv) { -#ifdef VFIO_PRESENT case RTE_PCI_KDRV_VFIO: pci_vfio_ioport_read(p, data, len, offset); break; -#endif case RTE_PCI_KDRV_IGB_UIO: case RTE_PCI_KDRV_UIO_GENERIC: pci_uio_ioport_read(p, data, len, offset); @@ -777,11 +759,9 @@ rte_pci_ioport_write(struct rte_pci_ioport *p, const void *data, size_t len, off_t offset) { switch (p->dev->kdrv) { -#ifdef VFIO_PRESENT case RTE_PCI_KDRV_VFIO: pci_vfio_ioport_write(p, data, len, offset); break; -#endif case RTE_PCI_KDRV_IGB_UIO: case RTE_PCI_KDRV_UIO_GENERIC: pci_uio_ioport_write(p, data, len, offset); @@ -798,12 +778,10 @@ rte_pci_ioport_unmap(struct rte_pci_ioport *p) int ret = -1; switch (p->dev->kdrv) { -#ifdef VFIO_PRESENT case RTE_PCI_KDRV_VFIO: if (pci_vfio_is_enabled()) ret = pci_vfio_ioport_unmap(p); break; -#endif case RTE_PCI_KDRV_IGB_UIO: case RTE_PCI_KDRV_UIO_GENERIC: ret = pci_uio_ioport_unmap(p); diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h index a4d37c0d0a8..6949dd57d95 100644 --- a/drivers/bus/pci/linux/pci_init.h +++ b/drivers/bus/pci/linux/pci_init.h @@ -5,8 +5,6 @@ #ifndef EAL_PCI_INIT_H_ #define EAL_PCI_INIT_H_ -#include - #include "private.h" /** IO resource type: */ @@ -50,8 +48,6 @@ void pci_uio_ioport_write(struct rte_pci_ioport *p, const void *data, size_t len, off_t offset); int pci_uio_ioport_unmap(struct rte_pci_ioport *p); -#ifdef VFIO_PRESENT - /* access config space */ int pci_vfio_read_config(const struct rte_pci_device *dev, void *buf, size_t len, off_t offs); @@ -77,6 +73,4 @@ int pci_vfio_unmap_resource(struct rte_pci_device *dev); int pci_vfio_is_enabled(void); -#endif - #endif /* EAL_PCI_INIT_H_ */ diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index 5317170231b..c63721dd616 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -2,6 +2,8 @@ * Copyright(c) 2010-2014 Intel Corporation */ +#include + #include #include #include @@ -27,17 +29,6 @@ #include "pci_init.h" #include "private.h" -/** - * @file - * PCI probing using Linux VFIO. - * - * This code tries to determine if the PCI device is bound to VFIO driver, - * and initialize it (map BARs, set up interrupts) if that's the case. - * - */ - -#ifdef VFIO_PRESENT - static struct rte_tailq_elem rte_vfio_tailq = { .name = "VFIO_RESOURCE_LIST", }; @@ -284,7 +275,6 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) return -1; } -#ifdef HAVE_VFIO_DEV_REQ_INTERFACE /* * Spinlock for device hot-unplug failure handling. * If it tries to access bus or device, such as handle sigbus on bus @@ -401,7 +391,6 @@ pci_vfio_disable_notifier(struct rte_pci_device *dev) return 0; } -#endif static int pci_vfio_is_ioport_bar(const struct rte_pci_device *dev, int vfio_dev_fd, @@ -676,12 +665,12 @@ pci_vfio_info_cap(struct vfio_region_info *info, int cap) struct vfio_info_cap_header *h; size_t offset; - if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) { + if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) == 0) { /* VFIO info does not advertise capabilities */ return NULL; } - offset = VFIO_CAP_OFFSET(info); + offset = info->cap_offset; while (offset != 0) { h = RTE_PTR_ADD(info, offset); if (h->id == cap) @@ -701,7 +690,7 @@ pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region) if (ret < 0) return -1; - ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL; + ret = pci_vfio_info_cap(info, VFIO_REGION_INFO_CAP_MSIX_MAPPABLE) != NULL; /* cleanup */ free(info); @@ -756,10 +745,8 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) if (rte_intr_fd_set(dev->intr_handle, -1)) return -1; -#ifdef HAVE_VFIO_DEV_REQ_INTERFACE if (rte_intr_fd_set(dev->vfio_req_intr_handle, -1)) return -1; -#endif /* store PCI address string */ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, @@ -915,13 +902,11 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) goto err_map; } -#ifdef HAVE_VFIO_DEV_REQ_INTERFACE if (pci_vfio_enable_notifier(dev, vfio_dev_fd) != 0) { PCI_LOG(ERR, "Error setting up notifier!"); goto err_map; } -#endif TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next); return 0; @@ -956,10 +941,8 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) if (rte_intr_fd_set(dev->intr_handle, -1)) return -1; -#ifdef HAVE_VFIO_DEV_REQ_INTERFACE if (rte_intr_fd_set(dev->vfio_req_intr_handle, -1)) return -1; -#endif /* store PCI address string */ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, @@ -1013,10 +996,8 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) /* we need save vfio_dev_fd, so it can be used during release */ if (rte_intr_dev_fd_set(dev->intr_handle, vfio_dev_fd)) goto err_vfio_dev_fd; -#ifdef HAVE_VFIO_DEV_REQ_INTERFACE if (rte_intr_dev_fd_set(dev->vfio_req_intr_handle, vfio_dev_fd)) goto err_vfio_dev_fd; -#endif return 0; err_vfio_dev_fd: @@ -1096,14 +1077,12 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev) snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, loc->domain, loc->bus, loc->devid, loc->function); -#ifdef HAVE_VFIO_DEV_REQ_INTERFACE ret = pci_vfio_disable_notifier(dev); if (ret) { PCI_LOG(ERR, "fail to disable req notifier."); return -1; } -#endif if (rte_intr_fd_get(dev->intr_handle) < 0) return -1; @@ -1237,6 +1216,7 @@ pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, return 0; } +#define PCI_VFIO_GET_REGION_IDX(x) (x >> 40) void pci_vfio_ioport_read(struct rte_pci_ioport *p, void *data, size_t len, off_t offset) @@ -1250,7 +1230,7 @@ pci_vfio_ioport_read(struct rte_pci_ioport *p, if (pread(vfio_dev_fd, data, len, p->base + offset) <= 0) PCI_LOG(ERR, "Can't read from PCI bar (%" PRIu64 ") : offset (%x)", - VFIO_GET_REGION_IDX(p->base), (int)offset); + PCI_VFIO_GET_REGION_IDX(p->base), (int)offset); } void @@ -1266,7 +1246,7 @@ pci_vfio_ioport_write(struct rte_pci_ioport *p, if (pwrite(vfio_dev_fd, data, len, p->base + offset) <= 0) PCI_LOG(ERR, "Can't write to PCI bar (%" PRIu64 ") : offset (%x)", - VFIO_GET_REGION_IDX(p->base), (int)offset); + PCI_VFIO_GET_REGION_IDX(p->base), (int)offset); } int @@ -1327,4 +1307,3 @@ pci_vfio_is_enabled(void) } return status; } -#endif diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c index c88634f790b..bf5df3d94e4 100644 --- a/drivers/bus/pci/pci_common.c +++ b/drivers/bus/pci/pci_common.c @@ -603,7 +603,6 @@ pci_hot_unplug_handler(struct rte_device *dev) return -1; switch (pdev->kdrv) { -#ifdef HAVE_VFIO_DEV_REQ_INTERFACE case RTE_PCI_KDRV_VFIO: /* * vfio kernel module guaranty the pci device would not be @@ -614,7 +613,6 @@ pci_hot_unplug_handler(struct rte_device *dev) rte_dev_event_callback_process(dev->name, RTE_DEV_EVENT_REMOVE); break; -#endif case RTE_PCI_KDRV_IGB_UIO: case RTE_PCI_KDRV_UIO_GENERIC: case RTE_PCI_KDRV_NIC_UIO: diff --git a/drivers/bus/platform/bus_platform_driver.h b/drivers/bus/platform/bus_platform_driver.h index ef4b27c0c0b..76403043c13 100644 --- a/drivers/bus/platform/bus_platform_driver.h +++ b/drivers/bus/platform/bus_platform_driver.h @@ -15,9 +15,9 @@ #include #include +#include #include #include -#include #ifdef __cplusplus extern "C" { @@ -144,8 +144,6 @@ RTE_PMD_EXPORT_NAME(nm) #define RTE_PMD_REGISTER_ALIAS(nm, alias) \ static const char *pdrvinit_ ## nm ## _alias = RTE_STR(alias) -#ifdef VFIO_PRESENT - /** * Register a platform device driver. * @@ -170,22 +168,6 @@ void rte_platform_register(struct rte_platform_driver *pdrv); __rte_internal void rte_platform_unregister(struct rte_platform_driver *pdrv); -#else - -__rte_internal -static inline void -rte_platform_register(struct rte_platform_driver *pdrv __rte_unused) -{ -} - -__rte_internal -static inline void -rte_platform_unregister(struct rte_platform_driver *pdrv __rte_unused) -{ -} - -#endif /* VFIO_PRESENT */ - #ifdef __cplusplus } #endif diff --git a/drivers/bus/platform/platform.c b/drivers/bus/platform/platform.c index 0f50027236b..f6673cf1813 100644 --- a/drivers/bus/platform/platform.c +++ b/drivers/bus/platform/platform.c @@ -2,6 +2,8 @@ * Copyright(C) 2023 Marvell. */ +#include + #include #include #include @@ -25,8 +27,6 @@ #include "private.h" -#ifdef VFIO_PRESENT - #define PLATFORM_BUS_DEVICES_PATH "/sys/bus/platform/devices" RTE_EXPORT_INTERNAL_SYMBOL(rte_platform_register) @@ -647,5 +647,3 @@ struct rte_platform_bus platform_bus = { RTE_REGISTER_BUS(platform, platform_bus.bus); RTE_LOG_REGISTER_DEFAULT(platform_bus_logtype, NOTICE); - -#endif /* VFIO_PRESENT */ diff --git a/drivers/bus/platform/platform_params.c b/drivers/bus/platform/platform_params.c index 8a6214b97cd..65b20d121f6 100644 --- a/drivers/bus/platform/platform_params.c +++ b/drivers/bus/platform/platform_params.c @@ -10,13 +10,10 @@ #include #include #include -#include #include "bus_platform_driver.h" #include "private.h" -#ifdef VFIO_PRESENT - enum platform_params { RTE_PLATFORM_PARAM_NAME, }; @@ -73,5 +70,3 @@ platform_bus_dev_iterate(const void *start, const char *str, return dev; } - -#endif /* VFIO_PRESENT */ diff --git a/drivers/bus/vmbus/rte_vmbus_reg.h b/drivers/bus/vmbus/rte_vmbus_reg.h index fb7e3043ec6..6370a07f95d 100644 --- a/drivers/bus/vmbus/rte_vmbus_reg.h +++ b/drivers/bus/vmbus/rte_vmbus_reg.h @@ -6,6 +6,12 @@ #ifndef _VMBUS_REG_H_ #define _VMBUS_REG_H_ +#include + +#include +#include +#include + /* * Hyper-V SynIC message format. */ diff --git a/drivers/common/cnxk/roc_platform.c b/drivers/common/cnxk/roc_platform.c index 88f229163a2..e13cb42285f 100644 --- a/drivers/common/cnxk/roc_platform.c +++ b/drivers/common/cnxk/roc_platform.c @@ -2,6 +2,8 @@ * Copyright(C) 2021 Marvell. */ +#include + #include #include @@ -11,7 +13,6 @@ #if defined(__linux__) #include -#include #include #include #include diff --git a/drivers/common/mlx5/linux/meson.build b/drivers/common/mlx5/linux/meson.build index 5548a561999..3fe7db7ec47 100644 --- a/drivers/common/mlx5/linux/meson.build +++ b/drivers/common/mlx5/linux/meson.build @@ -23,7 +23,7 @@ foreach libname:libnames if not lib.found() and not static_ibverbs lib = cc.find_library(libname, required:false) endif - if lib.found() + if lib.found() and cc.links(min_c_code, dependencies: lib) libs += lib if not static_ibverbs and not dlopen_ibverbs ext_deps += lib @@ -47,7 +47,7 @@ endif libmtcr_ul_found = false lib = cc.find_library('mtcr_ul', required:false) -if lib.found() +if lib.found() and cc.links(min_c_code, dependencies: lib) libmtcr_ul_found = true ext_deps += lib endif diff --git a/drivers/common/mlx5/windows/meson.build b/drivers/common/mlx5/windows/meson.build index c3bd3aac26f..f454788a9f0 100644 --- a/drivers/common/mlx5/windows/meson.build +++ b/drivers/common/mlx5/windows/meson.build @@ -8,7 +8,7 @@ if not cc.has_header('mlx5devx.h') endif devxlib = cc.find_library('mlx5devx', required: false) -if not devxlib.found() +if not devxlib.found() or not cc.links(min_c_code, dependencies: devxlib) build = false reason = 'missing dependency, "mlx5devx"' subdir_done() diff --git a/drivers/common/qat/dev/qat_dev_gen_lce.c b/drivers/common/qat/dev/qat_dev_gen_lce.c index 6514321c32b..9173b6c681b 100644 --- a/drivers/common/qat/dev/qat_dev_gen_lce.c +++ b/drivers/common/qat/dev/qat_dev_gen_lce.c @@ -3,7 +3,6 @@ */ #include -#include #include "qat_device.h" #include "qat_qp.h" @@ -17,9 +16,6 @@ #define BITS_PER_ULONG (sizeof(unsigned long) * 8) -#define VFIO_PCI_LCE_DEVICE_CFG_REGION_INDEX VFIO_PCI_NUM_REGIONS -#define VFIO_PCI_LCE_CY_CFG_REGION_INDEX (VFIO_PCI_NUM_REGIONS + 2) -#define VFIO_PCI_LCE_RING_CFG_REGION_INDEX (VFIO_PCI_NUM_REGIONS + 4) #define LCE_DEVICE_NAME_SIZE 64 #define LCE_DEVICE_MAX_BANKS 2080 #define LCE_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) diff --git a/drivers/compress/zlib/zlib_pmd.c b/drivers/compress/zlib/zlib_pmd.c index 92e808e78c7..26673851fab 100644 --- a/drivers/compress/zlib/zlib_pmd.c +++ b/drivers/compress/zlib/zlib_pmd.c @@ -4,6 +4,7 @@ #include #include +#include #include "zlib_pmd_private.h" @@ -15,6 +16,115 @@ (data = rte_pktmbuf_mtod(mbuf, uint8_t *)), \ (len = rte_pktmbuf_data_len(mbuf)) : 0) +#define BOTTOM_NIBBLE_OF_BYTE 0xf +#define TOP_NIBBLE_OF_BYTE 0xf0 +#define BOTTOM_NIBBLE_OF_BYTES_IN_DOUBLE_WORD 0x0f0f0f0f +#define TOP_NIBBLE_OF_BYTE_IN_DOUBLE_WORD 0xf0f0f0f0 + +static void +process_zlib_deflate_chksum(struct rte_comp_op *op, + z_stream *strm, enum rte_comp_checksum_type chksum) +{ + uint32_t dictionary_len = 0; + uint8_t *dictionary = malloc(DEFLATE_MAX_WINDOW_SIZE); + op->status = RTE_COMP_OP_STATUS_SUCCESS; + + switch (chksum) { + case RTE_COMP_CHECKSUM_3GPP_PDCP_UDC: + + if (!dictionary) { + ZLIB_PMD_ERR("Unable to fetch dictionary"); + op->status = RTE_COMP_OP_STATUS_ERROR; + return; + } + + if (deflateGetDictionary(strm, dictionary, &dictionary_len)) { + ZLIB_PMD_ERR("Unable to fetch dictionary"); + op->status = RTE_COMP_OP_STATUS_CHECKSUM_VALIDATION_FAILED; + free(dictionary); + return; + } + + uint32_t dictionary_start = (uint32_t)(*dictionary); + uint32_t dictionary_end = (uint32_t)(*(dictionary + dictionary_len - 4)); + uint32_t sum = (dictionary_start & BOTTOM_NIBBLE_OF_BYTES_IN_DOUBLE_WORD) + + (dictionary_start & (TOP_NIBBLE_OF_BYTE_IN_DOUBLE_WORD >> 4)) + + (dictionary_end & BOTTOM_NIBBLE_OF_BYTES_IN_DOUBLE_WORD) + + (dictionary_end & (TOP_NIBBLE_OF_BYTE_IN_DOUBLE_WORD >> 4)); + uint8_t *sum_bytes = (uint8_t *)∑ + + op->output_chksum = ~(sum_bytes[0] + sum_bytes[1] + sum_bytes[2] + sum_bytes[3]) + & BOTTOM_NIBBLE_OF_BYTE; + break; + case RTE_COMP_CHECKSUM_NONE: + break; + case RTE_COMP_CHECKSUM_CRC32: + case RTE_COMP_CHECKSUM_ADLER32: + case RTE_COMP_CHECKSUM_CRC32_ADLER32: + default: + ZLIB_PMD_ERR("Checksum not supported"); + op->status = RTE_COMP_OP_STATUS_INVALID_ARGS; + free(dictionary); + return; + } + free(dictionary); +} + +static void +process_zlib_inflate_chksum(struct rte_comp_op *op, + z_stream *strm, + enum rte_comp_checksum_type chksum) +{ + uint32_t dictionary_len = 0; + uint8_t *dictionary = malloc(DEFLATE_MAX_WINDOW_SIZE); + op->status = RTE_COMP_OP_STATUS_SUCCESS; + + switch (chksum) { + case RTE_COMP_CHECKSUM_3GPP_PDCP_UDC: + if (!dictionary) { + ZLIB_PMD_ERR("Unable to malloc dictionary"); + op->status = RTE_COMP_OP_STATUS_ERROR; + return; + } + + if (inflateGetDictionary(strm, dictionary, &dictionary_len)) { + ZLIB_PMD_ERR("Unable to fetch dictionary"); + op->status = RTE_COMP_OP_STATUS_CHECKSUM_VALIDATION_FAILED; + free(dictionary); + return; + } + + uint32_t dictionary_start = (uint32_t)(*dictionary); + uint32_t dictionary_end = (uint32_t)(*(dictionary + dictionary_len - 4)); + uint32_t sum = (dictionary_start & BOTTOM_NIBBLE_OF_BYTES_IN_DOUBLE_WORD) + + (dictionary_start & (TOP_NIBBLE_OF_BYTE_IN_DOUBLE_WORD >> 4)) + + (dictionary_end & BOTTOM_NIBBLE_OF_BYTES_IN_DOUBLE_WORD) + + (dictionary_end & (TOP_NIBBLE_OF_BYTE_IN_DOUBLE_WORD >> 4)); + uint8_t *sum_bytes = (uint8_t *)∑ + + op->output_chksum = ~(sum_bytes[0] + sum_bytes[1] + sum_bytes[2] + sum_bytes[3]) + & BOTTOM_NIBBLE_OF_BYTE; + + if (op->input_chksum != op->output_chksum) { + ZLIB_PMD_ERR("Checksum does not match"); + op->status = RTE_COMP_OP_STATUS_CHECKSUM_VALIDATION_FAILED; + free(dictionary); + return; + } + break; + case RTE_COMP_CHECKSUM_NONE: + break; + case RTE_COMP_CHECKSUM_CRC32: + case RTE_COMP_CHECKSUM_ADLER32: + case RTE_COMP_CHECKSUM_CRC32_ADLER32: + default: + ZLIB_PMD_ERR("Checksum not supported"); + op->status = RTE_COMP_OP_STATUS_INVALID_ARGS; + free(dictionary); + return; + } +} + static void process_zlib_deflate(struct rte_comp_op *op, z_stream *strm) { @@ -27,6 +137,9 @@ process_zlib_deflate(struct rte_comp_op *op, z_stream *strm) case RTE_COMP_FLUSH_FINAL: fin_flush = Z_FINISH; break; + case RTE_COMP_FLUSH_SYNC: + fin_flush = Z_SYNC_FLUSH; + break; default: op->status = RTE_COMP_OP_STATUS_INVALID_ARGS; ZLIB_PMD_ERR("Invalid flush value"); @@ -49,6 +162,9 @@ process_zlib_deflate(struct rte_comp_op *op, z_stream *strm) strm->avail_out = rte_pktmbuf_data_len(mbuf_dst) - op->dst.offset; + uLong total_in_at_start = strm->total_in; + uLong total_out_at_start = strm->total_out; + /* Set flush value to NO_FLUSH unless it is last mbuf */ flush = Z_NO_FLUSH; /* Initialize status to SUCCESS */ @@ -56,8 +172,8 @@ process_zlib_deflate(struct rte_comp_op *op, z_stream *strm) do { /* Set flush value to Z_FINISH for last block */ - if ((op->src.length - strm->total_in) <= strm->avail_in) { - strm->avail_in = (op->src.length - strm->total_in); + if ((op->src.length - (strm->total_in - total_in_at_start)) <= strm->avail_in) { + strm->avail_in = (op->src.length - (strm->total_in - total_in_at_start)); flush = fin_flush; } do { @@ -92,17 +208,18 @@ process_zlib_deflate(struct rte_comp_op *op, z_stream *strm) /* Update op stats */ switch (op->status) { case RTE_COMP_OP_STATUS_SUCCESS: - op->consumed += strm->total_in; + op->consumed += strm->total_in - total_in_at_start; /* Fall-through */ case RTE_COMP_OP_STATUS_OUT_OF_SPACE_TERMINATED: - op->produced += strm->total_out; + op->produced += strm->total_out - total_out_at_start; break; default: ZLIB_PMD_ERR("stats not updated for status:%d", op->status); } - deflateReset(strm); + if (op->flush_flag != RTE_COMP_FLUSH_SYNC) + deflateReset(strm); } static void @@ -127,6 +244,9 @@ process_zlib_inflate(struct rte_comp_op *op, z_stream *strm) strm->avail_out = rte_pktmbuf_data_len(mbuf_dst) - op->dst.offset; + uLong total_in_at_start = strm->total_in; + uLong total_out_at_start = strm->total_out; + /** Ignoring flush value provided from application for decompression */ flush = Z_NO_FLUSH; /* initialize status to SUCCESS */ @@ -178,17 +298,18 @@ process_zlib_inflate(struct rte_comp_op *op, z_stream *strm) /* Update op stats */ switch (op->status) { case RTE_COMP_OP_STATUS_SUCCESS: - op->consumed += strm->total_in; + op->consumed += strm->total_in - total_in_at_start; /* Fall-through */ case RTE_COMP_OP_STATUS_OUT_OF_SPACE_TERMINATED: - op->produced += strm->total_out; + op->produced += strm->total_out - total_out_at_start; break; default: ZLIB_PMD_ERR("stats not produced for status:%d", op->status); } - inflateReset(strm); + if (op->flush_flag != RTE_COMP_FLUSH_SYNC) + inflateReset(strm); } /** Process comp operation for mbuf */ @@ -203,10 +324,14 @@ process_zlib_op(struct zlib_qp *qp, struct rte_comp_op *op) (op->dst.offset > rte_pktmbuf_data_len(op->m_dst))) { op->status = RTE_COMP_OP_STATUS_INVALID_ARGS; ZLIB_PMD_ERR("Invalid source or destination buffers or " - "invalid Operation requested"); + "invalid Operation requested"); } else { private_xform = (struct zlib_priv_xform *)op->private_xform; stream = &private_xform->stream; + stream->chksum(op, &stream->strm, stream->chksum_type); + if (op->status != RTE_COMP_OP_STATUS_SUCCESS) + return -1; + stream->comp(op, &stream->strm); } /* whatever is out of op, put it into completion queue with @@ -232,6 +357,7 @@ zlib_set_stream_parameters(const struct rte_comp_xform *xform, case RTE_COMP_COMPRESS: stream->comp = process_zlib_deflate; stream->free = deflateEnd; + stream->chksum = process_zlib_deflate_chksum; /** Compression window bits */ switch (xform->compress.algo) { case RTE_COMP_ALGO_DEFLATE: @@ -281,17 +407,30 @@ zlib_set_stream_parameters(const struct rte_comp_xform *xform, ZLIB_PMD_ERR("Compression strategy not supported"); return -1; } + + /** Checksum used */ + stream->chksum_type = xform->compress.chksum; + if (deflateInit2(strm, level, Z_DEFLATED, wbits, DEF_MEM_LEVEL, strategy) != Z_OK) { ZLIB_PMD_ERR("Deflate init failed"); return -1; } + + if (xform->compress.deflate.dictionary) { + if (deflateSetDictionary(strm, xform->compress.deflate.dictionary, + xform->compress.deflate.dictionary_len)) { + ZLIB_PMD_ERR("Deflate set dictionary failed"); + return -1; + } + } break; case RTE_COMP_DECOMPRESS: stream->comp = process_zlib_inflate; stream->free = inflateEnd; + stream->chksum = process_zlib_inflate_chksum; /** window bits */ switch (xform->decompress.algo) { case RTE_COMP_ALGO_DEFLATE: @@ -302,10 +441,21 @@ zlib_set_stream_parameters(const struct rte_comp_xform *xform, return -1; } + /** Checksum used */ + stream->chksum_type = xform->decompress.chksum; + if (inflateInit2(strm, wbits) != Z_OK) { ZLIB_PMD_ERR("Inflate init failed"); return -1; } + + if (xform->decompress.inflate.dictionary) { + if (inflateSetDictionary(strm, xform->decompress.inflate.dictionary, + xform->decompress.inflate.dictionary_len)) { + ZLIB_PMD_ERR("inflate set dictionary failed"); + return -1; + } + } break; default: return -1; diff --git a/drivers/compress/zlib/zlib_pmd_private.h b/drivers/compress/zlib/zlib_pmd_private.h index fd8c4c55a4d..f6bd948cc29 100644 --- a/drivers/compress/zlib/zlib_pmd_private.h +++ b/drivers/compress/zlib/zlib_pmd_private.h @@ -46,6 +46,9 @@ typedef void (*comp_func_t)(struct rte_comp_op *op, z_stream *strm); typedef int (*comp_free_t)(z_stream *strm); +typedef void (*chksum_func_t) + (struct rte_comp_op *op, z_stream *strm, enum rte_comp_checksum_type chksum); + /** ZLIB Stream structure */ struct __rte_cache_aligned zlib_stream { z_stream strm; @@ -54,6 +57,10 @@ struct __rte_cache_aligned zlib_stream { /**< Operation (compression/decompression) */ comp_free_t free; /**< Free Operation (compression/decompression) */ + chksum_func_t chksum; + /**< Checksum Operation (compression/decompression) */ + enum rte_comp_checksum_type chksum_type; + /**< Type of checksum to generate on the uncompressed data */ }; /** ZLIB private xform structure */ diff --git a/drivers/crypto/bcmfs/bcmfs_vfio.c b/drivers/crypto/bcmfs/bcmfs_vfio.c index dc2def580f2..e7f7ed994c3 100644 --- a/drivers/crypto/bcmfs/bcmfs_vfio.c +++ b/drivers/crypto/bcmfs/bcmfs_vfio.c @@ -3,6 +3,8 @@ * All rights reserved. */ +#include + #include #include #include @@ -13,7 +15,6 @@ #include "bcmfs_logs.h" #include "bcmfs_vfio.h" -#ifdef VFIO_PRESENT static int vfio_map_dev_obj(const char *path, const char *dev_obj, uint32_t *size, void **addr, int *dev_fd) @@ -93,15 +94,3 @@ bcmfs_release_vfio(struct bcmfs_device *dev) return; } } -#else -int -bcmfs_attach_vfio(struct bcmfs_device *dev __rte_unused) -{ - return -1; -} - -void -bcmfs_release_vfio(struct bcmfs_device *dev __rte_unused) -{ -} -#endif diff --git a/drivers/crypto/bcmfs/meson.build b/drivers/crypto/bcmfs/meson.build index 5842f83a3bb..925dde2ee2b 100644 --- a/drivers/crypto/bcmfs/meson.build +++ b/drivers/crypto/bcmfs/meson.build @@ -3,9 +3,9 @@ # All rights reserved. # -if is_windows +if not is_linux build = false - reason = 'not supported on Windows' + reason = 'only supported on Linux' subdir_done() endif diff --git a/drivers/crypto/cnxk/rte_pmd_cnxk_crypto.h b/drivers/crypto/cnxk/rte_pmd_cnxk_crypto.h index 46861ab2cf9..70c019e94cc 100644 --- a/drivers/crypto/cnxk/rte_pmd_cnxk_crypto.h +++ b/drivers/crypto/cnxk/rte_pmd_cnxk_crypto.h @@ -11,8 +11,10 @@ #ifndef _PMD_CNXK_CRYPTO_H_ #define _PMD_CNXK_CRYPTO_H_ +#include #include +#include #include #include diff --git a/drivers/meson.build b/drivers/meson.build index f25f4255656..424acb03a7f 100644 --- a/drivers/meson.build +++ b/drivers/meson.build @@ -241,11 +241,8 @@ foreach subpath:subdirs dpdk_extra_ldflags += pkgconfig_extra_libs - install_headers(headers) - if get_option('enable_driver_sdk') - install_headers(driver_sdk_headers) - endif - dpdk_chkinc_headers += driver_sdk_headers + dpdk_headers += headers + dpdk_drivers_headers += driver_sdk_headers if headers.length() > 0 dpdk_includes += include_directories(drv_path) diff --git a/drivers/ml/cnxk/meson.build b/drivers/ml/cnxk/meson.build index 0680a0faa5c..ef7e21d7476 100644 --- a/drivers/ml/cnxk/meson.build +++ b/drivers/ml/cnxk/meson.build @@ -72,7 +72,11 @@ ext_deps += dlpack_dep ext_deps += dmlc_dep ext_deps += tvm_dep ext_deps += tvmdp_dep -ext_deps += cc.find_library('stdc++', required: true) +stdcpp_dep = cc.find_library('stdc++', required: true) +if not cc.links(min_c_code, dependencies: stdcpp_dep) + error('broken dependency, "libstdc++"') +endif +ext_deps += stdcpp_dep deps += ['bus_vdev'] diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build index 2d37bcc8696..b35888b8693 100644 --- a/drivers/net/af_xdp/meson.build +++ b/drivers/net/af_xdp/meson.build @@ -20,6 +20,9 @@ xdp_dep = dependency('libxdp', version : libxdp_ver, required: false, method: 'p bpf_dep = dependency('libbpf', required: false, method: 'pkg-config') if not bpf_dep.found() bpf_dep = cc.find_library('bpf', required: false) + if bpf_dep.found() and not cc.links(min_c_code, dependencies: bpf_dep) + bpf_dep = disabler() + endif endif cflags += no_wvla_cflag diff --git a/drivers/net/dpaa/rte_pmd_dpaa.h b/drivers/net/dpaa/rte_pmd_dpaa.h index ec45633ba23..0a57e2097af 100644 --- a/drivers/net/dpaa/rte_pmd_dpaa.h +++ b/drivers/net/dpaa/rte_pmd_dpaa.h @@ -5,6 +5,8 @@ #ifndef _PMD_DPAA_H_ #define _PMD_DPAA_H_ +#include + /** * @file rte_pmd_dpaa.h * diff --git a/drivers/net/hns3/hns3_ethdev_vf.c b/drivers/net/hns3/hns3_ethdev_vf.c index f9ef3dbb062..59fb7902406 100644 --- a/drivers/net/hns3/hns3_ethdev_vf.c +++ b/drivers/net/hns3/hns3_ethdev_vf.c @@ -5,7 +5,6 @@ #include #include #include -#include #include "hns3_ethdev.h" #include "hns3_common.h" diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h index 770284f7ab2..741808f5735 100644 --- a/drivers/net/intel/common/rx.h +++ b/drivers/net/intel/common/rx.h @@ -131,6 +131,7 @@ struct ci_rx_path_features_extra { bool flex_desc; bool bulk_alloc; bool disabled; + bool single_queue; }; struct ci_rx_path_features { @@ -278,6 +279,10 @@ ci_rx_path_select(struct ci_rx_path_features req_features, if (path_features->extra.flex_desc != req_features.extra.flex_desc) continue; + /* If requested, ensure the path supports single queue RX. */ + if (path_features->extra.single_queue != req_features.extra.single_queue) + continue; + /* If requested, ensure the path supports scattered RX. */ if (path_features->extra.scattered != req_features.extra.scattered) continue; diff --git a/drivers/net/intel/common/rx_vec_arm.h b/drivers/net/intel/common/rx_vec_arm.h index 2e48d4b6c03..f7e7b8c3966 100644 --- a/drivers/net/intel/common/rx_vec_arm.h +++ b/drivers/net/intel/common/rx_vec_arm.h @@ -16,19 +16,19 @@ static inline int _ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq) { - struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start]; + struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start].mbuf; const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH; volatile union ci_rx_desc *rxdp; int i; rxdp = &rxq->rx_ring[rxq->rxrearm_start]; - if (rte_mempool_get_bulk(rxq->mp, (void **)rxp, rearm_thresh) < 0) { + if (rte_mbuf_raw_alloc_bulk(rxq->mp, rxp, rearm_thresh) < 0) { if (rxq->rxrearm_nb + rearm_thresh >= rxq->nb_rx_desc) { uint64x2_t zero = vdupq_n_u64(0); for (i = 0; i < CI_VPMD_DESCS_PER_LOOP; i++) { - rxp[i].mbuf = &rxq->fake_mbuf; + rxp[i] = &rxq->fake_mbuf; vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i]), zero); } } diff --git a/drivers/net/intel/common/rx_vec_ppc.h b/drivers/net/intel/common/rx_vec_ppc.h index 97affc34c2d..7d02eb067bd 100644 --- a/drivers/net/intel/common/rx_vec_ppc.h +++ b/drivers/net/intel/common/rx_vec_ppc.h @@ -16,19 +16,19 @@ static inline int _ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq) { - struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start]; + struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start].mbuf; const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH; volatile union ci_rx_desc *rxdp; int i; rxdp = &rxq->rx_ring[rxq->rxrearm_start]; - if (rte_mempool_get_bulk(rxq->mp, (void **)rxp, rearm_thresh) < 0) { + if (rte_mbuf_raw_alloc_bulk(rxq->mp, rxp, rearm_thresh) < 0) { if (rxq->rxrearm_nb + rearm_thresh >= rxq->nb_rx_desc) { __vector unsigned long dma_addr0 = (__vector unsigned long){}; for (i = 0; i < CI_VPMD_DESCS_PER_LOOP; i++) { - rxp[i].mbuf = &rxq->fake_mbuf; + rxp[i] = &rxq->fake_mbuf; vec_st(dma_addr0, 0, RTE_CAST_PTR(__vector unsigned long *, &rxdp[i])); } diff --git a/drivers/net/intel/common/rx_vec_x86.h b/drivers/net/intel/common/rx_vec_x86.h index 32dd5ce189a..8d497e7196d 100644 --- a/drivers/net/intel/common/rx_vec_x86.h +++ b/drivers/net/intel/common/rx_vec_x86.h @@ -21,19 +21,19 @@ enum ci_rx_vec_level { static inline int _ci_rxq_rearm_get_bufs(struct ci_rx_queue *rxq) { - struct ci_rx_entry *rxp = &rxq->sw_ring[rxq->rxrearm_start]; + struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start].mbuf; const uint16_t rearm_thresh = CI_VPMD_RX_REARM_THRESH; volatile union ci_rx_desc *rxdp; int i; rxdp = &rxq->rx_ring[rxq->rxrearm_start]; - if (rte_mempool_get_bulk(rxq->mp, (void **)rxp, rearm_thresh) < 0) { + if (rte_mbuf_raw_alloc_bulk(rxq->mp, rxp, rearm_thresh) < 0) { if (rxq->rxrearm_nb + rearm_thresh >= rxq->nb_rx_desc) { const __m128i zero = _mm_setzero_si128(); for (i = 0; i < CI_VPMD_DESCS_PER_LOOP; i++) { - rxp[i].mbuf = &rxq->fake_mbuf; + rxp[i] = &rxq->fake_mbuf; _mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i]), zero); } } @@ -359,7 +359,8 @@ ci_get_x86_max_simd_bitwidth(void) int simd = X86_MAX_SIMD_BITWIDTH; if (simd >= 512 && rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 && - rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1) + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1 && + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512DQ) == 1) ret = RTE_VECT_SIMD_512; else if (simd >= 256 && (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1)) ret = RTE_VECT_SIMD_256; diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h index 7dc84da1703..5af64a4cfee 100644 --- a/drivers/net/intel/common/tx.h +++ b/drivers/net/intel/common/tx.h @@ -84,7 +84,6 @@ struct ci_tx_queue { }; struct { /* iavf driver specific values */ uint16_t ipsec_crypto_pkt_md_offset; - uint8_t rel_mbufs_type; #define IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1 BIT(0) #define IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2 BIT(1) uint8_t vlan_flag; @@ -203,13 +202,13 @@ ci_tx_free_bufs_vec(struct ci_tx_queue *txq, ci_desc_done_fn desc_done, bool ctx if (likely(m->pool == free[0]->pool)) { free[nb_free++] = m; } else { - rte_mempool_put_bulk(free[0]->pool, (void *)free, nb_free); + rte_mbuf_raw_free_bulk(free[0]->pool, free, nb_free); free[0] = m; nb_free = 1; } } } - rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); + rte_mbuf_raw_free_bulk(free[0]->pool, free, nb_free); } else { for (uint32_t i = 1; i < n; i++) { m = rte_pktmbuf_prefree_seg(txep[i].mbuf); diff --git a/drivers/net/intel/cpfl/cpfl_rxtx.c b/drivers/net/intel/cpfl/cpfl_rxtx.c index 02e81f7f348..453ec975d5c 100644 --- a/drivers/net/intel/cpfl/cpfl_rxtx.c +++ b/drivers/net/intel/cpfl/cpfl_rxtx.c @@ -1409,111 +1409,51 @@ cpfl_set_rx_function(struct rte_eth_dev *dev) { struct cpfl_vport *cpfl_vport = dev->data->dev_private; struct idpf_vport *vport = &cpfl_vport->base; + struct idpf_adapter *ad = vport->adapter; + struct ci_rx_path_features req_features = { + .rx_offloads = dev->data->dev_conf.rxmode.offloads, + .simd_width = RTE_VECT_SIMD_DISABLED, + }; #ifdef RTE_ARCH_X86 struct cpfl_rx_queue *cpfl_rxq; int i; if (cpfl_rx_vec_dev_check_default(dev) == CPFL_VECTOR_PATH && - rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) { - vport->rx_vec_allowed = true; + rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256) + req_features.simd_width = cpfl_get_max_simd_bitwidth(); +#endif /* RTE_ARCH_X86 */ - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 && - rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256) - vport->rx_use_avx2 = true; + req_features.extra.single_queue = (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE); + req_features.extra.scattered = dev->data->scattered_rx; - if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512) -#ifdef CC_AVX512_SUPPORT - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 && - rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1 && - rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512DQ)) - vport->rx_use_avx512 = true; -#else - PMD_DRV_LOG(NOTICE, - "AVX512 is not supported in build env"); -#endif /* CC_AVX512_SUPPORT */ - } else { - vport->rx_vec_allowed = false; - } -#endif /* RTE_ARCH_X86 */ + ad->rx_func_type = ci_rx_path_select(req_features, + &idpf_rx_path_infos[0], + IDPF_RX_MAX, + IDPF_RX_DEFAULT); #ifdef RTE_ARCH_X86 - if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { - if (vport->rx_vec_allowed) { + if (idpf_rx_path_infos[ad->rx_func_type].features.simd_width >= RTE_VECT_SIMD_256) { + /* Vector function selected. Prepare the rxq accordingly. */ + if (idpf_rx_path_infos[ad->rx_func_type].features.extra.single_queue) { for (i = 0; i < dev->data->nb_rx_queues; i++) { cpfl_rxq = dev->data->rx_queues[i]; - if (cpfl_rxq->hairpin_info.hairpin_q) - continue; - (void)idpf_qc_splitq_rx_vec_setup(&cpfl_rxq->base); - } -#ifdef CC_AVX512_SUPPORT - if (vport->rx_use_avx512) { - PMD_DRV_LOG(NOTICE, - "Using Split AVX512 Vector Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_splitq_recv_pkts_avx512; - return; + (void)idpf_qc_singleq_rx_vec_setup(&cpfl_rxq->base); } -#endif /* CC_AVX512_SUPPORT */ - } - PMD_DRV_LOG(NOTICE, - "Using Split Scalar Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_splitq_recv_pkts; - } else { - if (vport->rx_vec_allowed) { + } else { for (i = 0; i < dev->data->nb_rx_queues; i++) { cpfl_rxq = dev->data->rx_queues[i]; - (void)idpf_qc_singleq_rx_vec_setup(&cpfl_rxq->base); - } -#ifdef CC_AVX512_SUPPORT - if (vport->rx_use_avx512) { - PMD_DRV_LOG(NOTICE, - "Using Single AVX512 Vector Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_singleq_recv_pkts_avx512; - return; - } -#endif /* CC_AVX512_SUPPORT */ - if (vport->rx_use_avx2) { - PMD_DRV_LOG(NOTICE, - "Using Single AVX2 Vector Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_singleq_recv_pkts_avx2; - return; + if (cpfl_rxq->hairpin_info.hairpin_q) + continue; + (void)idpf_qc_splitq_rx_vec_setup(&cpfl_rxq->base); } } - if (dev->data->scattered_rx) { - PMD_DRV_LOG(NOTICE, - "Using Single Scalar Scatterd Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_singleq_recv_scatter_pkts; - return; - } - PMD_DRV_LOG(NOTICE, - "Using Single Scalar Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_singleq_recv_pkts; } -#else - if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { - PMD_DRV_LOG(NOTICE, - "Using Split Scalar Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_splitq_recv_pkts; - } else { - if (dev->data->scattered_rx) { - PMD_DRV_LOG(NOTICE, - "Using Single Scalar Scatterd Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_singleq_recv_scatter_pkts; - return; - } - PMD_DRV_LOG(NOTICE, - "Using Single Scalar Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_singleq_recv_pkts; - } -#endif /* RTE_ARCH_X86 */ +#endif + + dev->rx_pkt_burst = idpf_rx_path_infos[ad->rx_func_type].pkt_burst; + PMD_DRV_LOG(NOTICE, "Using %s Rx (port %d).", + idpf_rx_path_infos[ad->rx_func_type].info, dev->data->port_id); + } void @@ -1522,6 +1462,7 @@ cpfl_set_tx_function(struct rte_eth_dev *dev) struct cpfl_vport *cpfl_vport = dev->data->dev_private; struct idpf_vport *vport = &cpfl_vport->base; #ifdef RTE_ARCH_X86 + enum rte_vect_max_simd tx_simd_width = RTE_VECT_SIMD_DISABLED; #ifdef CC_AVX512_SUPPORT struct cpfl_tx_queue *cpfl_txq; int i; @@ -1530,22 +1471,12 @@ cpfl_set_tx_function(struct rte_eth_dev *dev) if (cpfl_tx_vec_dev_check_default(dev) == CPFL_VECTOR_PATH && rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) { vport->tx_vec_allowed = true; - - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 && - rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256) - vport->tx_use_avx2 = true; - - if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512) + tx_simd_width = cpfl_get_max_simd_bitwidth(); #ifdef CC_AVX512_SUPPORT - { - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 && - rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1) - vport->tx_use_avx512 = true; - if (vport->tx_use_avx512) { - for (i = 0; i < dev->data->nb_tx_queues; i++) { - cpfl_txq = dev->data->tx_queues[i]; - idpf_qc_tx_vec_avx512_setup(&cpfl_txq->base); - } + if (tx_simd_width == RTE_VECT_SIMD_512) { + for (i = 0; i < dev->data->nb_tx_queues; i++) { + cpfl_txq = dev->data->tx_queues[i]; + idpf_qc_tx_vec_avx512_setup(&cpfl_txq->base); } } #else @@ -1561,7 +1492,7 @@ cpfl_set_tx_function(struct rte_eth_dev *dev) if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { if (vport->tx_vec_allowed) { #ifdef CC_AVX512_SUPPORT - if (vport->tx_use_avx512) { + if (tx_simd_width == RTE_VECT_SIMD_512) { PMD_DRV_LOG(NOTICE, "Using Split AVX512 Vector Tx (port %d).", dev->data->port_id); @@ -1579,7 +1510,7 @@ cpfl_set_tx_function(struct rte_eth_dev *dev) } else { if (vport->tx_vec_allowed) { #ifdef CC_AVX512_SUPPORT - if (vport->tx_use_avx512) { + if (tx_simd_width == RTE_VECT_SIMD_512) { for (i = 0; i < dev->data->nb_tx_queues; i++) { cpfl_txq = dev->data->tx_queues[i]; if (cpfl_txq == NULL) @@ -1594,7 +1525,7 @@ cpfl_set_tx_function(struct rte_eth_dev *dev) return; } #endif /* CC_AVX512_SUPPORT */ - if (vport->tx_use_avx2) { + if (tx_simd_width == RTE_VECT_SIMD_256) { PMD_DRV_LOG(NOTICE, "Using Single AVX2 Vector Tx (port %d).", dev->data->port_id); diff --git a/drivers/net/intel/cpfl/cpfl_rxtx_vec_common.h b/drivers/net/intel/cpfl/cpfl_rxtx_vec_common.h index f1e555b5f83..525ca9a6e05 100644 --- a/drivers/net/intel/cpfl/cpfl_rxtx_vec_common.h +++ b/drivers/net/intel/cpfl/cpfl_rxtx_vec_common.h @@ -11,6 +11,10 @@ #include "cpfl_ethdev.h" #include "cpfl_rxtx.h" +#ifdef RTE_ARCH_X86 +#include "../common/rx_vec_x86.h" +#endif + #define CPFL_SCALAR_PATH 0 #define CPFL_VECTOR_PATH 1 #define CPFL_RX_NO_VECTOR_FLAGS ( \ @@ -121,4 +125,12 @@ cpfl_tx_vec_dev_check_default(struct rte_eth_dev *dev) return CPFL_VECTOR_PATH; } +#ifdef RTE_ARCH_X86 +static inline enum rte_vect_max_simd +cpfl_get_max_simd_bitwidth(void) +{ + return ci_get_x86_max_simd_bitwidth(); +} +#endif + #endif /*_CPFL_RXTX_VEC_COMMON_H_*/ diff --git a/drivers/net/intel/fm10k/fm10k_ethdev.c b/drivers/net/intel/fm10k/fm10k_ethdev.c index 75ce2e19cfd..57f09be88e2 100644 --- a/drivers/net/intel/fm10k/fm10k_ethdev.c +++ b/drivers/net/intel/fm10k/fm10k_ethdev.c @@ -205,15 +205,14 @@ rx_queue_reset(struct fm10k_rx_queue *q) int i, diag; PMD_INIT_FUNC_TRACE(); - diag = rte_mempool_get_bulk(q->mp, (void **)q->sw_ring, q->nb_desc); + diag = rte_mbuf_raw_alloc_bulk(q->mp, (void *)q->sw_ring, q->nb_desc); if (diag != 0) return -ENOMEM; for (i = 0; i < q->nb_desc; ++i) { fm10k_pktmbuf_reset(q->sw_ring[i], q->port_id); if (!fm10k_addr_alignment_valid(q->sw_ring[i])) { - rte_mempool_put_bulk(q->mp, (void **)q->sw_ring, - q->nb_desc); + rte_mbuf_raw_free_bulk(q->mp, q->sw_ring, q->nb_desc); return -EINVAL; } dma_addr = MBUF_DMA_ADDR_DEFAULT(q->sw_ring[i]); diff --git a/drivers/net/intel/fm10k/fm10k_rxtx.c b/drivers/net/intel/fm10k/fm10k_rxtx.c index d145b8a2cc6..abcb6672387 100644 --- a/drivers/net/intel/fm10k/fm10k_rxtx.c +++ b/drivers/net/intel/fm10k/fm10k_rxtx.c @@ -164,8 +164,8 @@ fm10k_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, q->next_dd = next_dd; if ((q->next_dd > q->next_trigger) || (alloc == 1)) { - ret = rte_mempool_get_bulk(q->mp, - (void **)&q->sw_ring[q->next_alloc], + ret = rte_mbuf_raw_alloc_bulk(q->mp, + (void *)&q->sw_ring[q->next_alloc], q->alloc_thresh); if (unlikely(ret != 0)) { @@ -322,8 +322,8 @@ fm10k_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, q->next_dd = next_dd; if ((q->next_dd > q->next_trigger) || (alloc == 1)) { - ret = rte_mempool_get_bulk(q->mp, - (void **)&q->sw_ring[q->next_alloc], + ret = rte_mbuf_raw_alloc_bulk(q->mp, + (void *)&q->sw_ring[q->next_alloc], q->alloc_thresh); if (unlikely(ret != 0)) { @@ -495,15 +495,14 @@ static inline void tx_free_bulk_mbuf(struct rte_mbuf **txep, int num) if (likely(m->pool == free[0]->pool)) free[nb_free++] = m; else { - rte_mempool_put_bulk(free[0]->pool, - (void *)free, nb_free); + rte_mbuf_raw_free_bulk(free[0]->pool, free, nb_free); free[0] = m; nb_free = 1; } } txep[i] = NULL; } - rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); + rte_mbuf_raw_free_bulk(free[0]->pool, free, nb_free); } else { for (i = 1; i < num; i++) { m = rte_pktmbuf_prefree_seg(txep[i]); diff --git a/drivers/net/intel/fm10k/fm10k_rxtx_vec.c b/drivers/net/intel/fm10k/fm10k_rxtx_vec.c index 715c891c304..0eada7275e1 100644 --- a/drivers/net/intel/fm10k/fm10k_rxtx_vec.c +++ b/drivers/net/intel/fm10k/fm10k_rxtx_vec.c @@ -259,7 +259,7 @@ fm10k_rxq_rearm(struct fm10k_rx_queue *rxq) rxdp = rxq->hw_ring + rxq->rxrearm_start; /* Pull 'n' more MBUFs into the software ring */ - if (rte_mempool_get_bulk(rxq->mp, + if (rte_mbuf_raw_alloc_bulk(rxq->mp, (void *)mb_alloc, RTE_FM10K_RXQ_REARM_THRESH) < 0) { dma_addr0 = _mm_setzero_si128(); @@ -775,14 +775,13 @@ fm10k_tx_free_bufs(struct fm10k_tx_queue *txq) if (likely(m->pool == free[0]->pool)) free[nb_free++] = m; else { - rte_mempool_put_bulk(free[0]->pool, - (void *)free, nb_free); + rte_mbuf_raw_free_bulk(free[0]->pool, free, nb_free); free[0] = m; nb_free = 1; } } } - rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); + rte_mbuf_raw_free_bulk(free[0]->pool, free, nb_free); } else { for (i = 1; i < n; i++) { m = rte_pktmbuf_prefree_seg(txep[i]); diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c index c3dd10e39b9..2bd09552252 100644 --- a/drivers/net/intel/i40e/i40e_rxtx.c +++ b/drivers/net/intel/i40e/i40e_rxtx.c @@ -595,7 +595,7 @@ i40e_rx_alloc_bufs(struct ci_rx_queue *rxq) alloc_idx = (uint16_t)(rxq->rx_free_trigger - (rxq->rx_free_thresh - 1)); rxep = &(rxq->sw_ring[alloc_idx]); - diag = rte_mempool_get_bulk(rxq->mp, (void *)rxep, + diag = rte_mbuf_raw_alloc_bulk(rxq->mp, (void *)rxep, rxq->rx_free_thresh); if (unlikely(diag != 0)) { PMD_DRV_LOG(ERR, "Failed to get mbufs in bulk"); @@ -1360,7 +1360,7 @@ i40e_tx_free_bufs(struct ci_tx_queue *txq) free[i] = txep->mbuf; txep->mbuf = NULL; } - rte_mempool_put_bulk(free[0]->pool, (void **)free, + rte_mbuf_raw_free_bulk(free[0]->pool, free, I40E_TX_MAX_FREE_BUF_SZ); } } @@ -1370,7 +1370,7 @@ i40e_tx_free_bufs(struct ci_tx_queue *txq) free[i] = txep->mbuf; txep->mbuf = NULL; } - rte_mempool_put_bulk(free[0]->pool, (void **)free, m); + rte_mbuf_raw_free_bulk(free[0]->pool, free, m); } } else { for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) { diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c index bf7d84db3bb..775fb4a66f4 100644 --- a/drivers/net/intel/iavf/iavf_rxtx.c +++ b/drivers/net/intel/iavf/iavf_rxtx.c @@ -2242,7 +2242,7 @@ iavf_rx_alloc_bufs(struct ci_rx_queue *rxq) alloc_idx = (uint16_t)(rxq->rx_free_trigger - (rxq->rx_free_thresh - 1)); rxep = &rxq->sw_ring[alloc_idx]; - diag = rte_mempool_get_bulk(rxq->mp, (void *)rxep, + diag = rte_mbuf_raw_alloc_bulk(rxq->mp, (void *)rxep, rxq->rx_free_thresh); if (unlikely(diag != 0)) { PMD_RX_LOG(ERR, "Failed to get mbufs in bulk"); diff --git a/drivers/net/intel/iavf/rte_pmd_iavf.h b/drivers/net/intel/iavf/rte_pmd_iavf.h index 56d453fc4c1..04b86a5dd78 100644 --- a/drivers/net/intel/iavf/rte_pmd_iavf.h +++ b/drivers/net/intel/iavf/rte_pmd_iavf.h @@ -15,6 +15,7 @@ */ #include + #include #include #include @@ -184,6 +185,7 @@ __rte_experimental static inline void rte_pmd_ifd_dump_proto_xtr_metadata(struct rte_mbuf *m) { +#ifdef ALLOW_EXPERIMENTAL_API union rte_pmd_ifd_proto_xtr_metadata data; if (!rte_pmd_ifd_dynf_proto_xtr_metadata_avail()) @@ -243,6 +245,10 @@ rte_pmd_ifd_dump_proto_xtr_metadata(struct rte_mbuf *m) else if (m->ol_flags & RTE_IAVF_PKT_RX_DYNF_PROTO_XTR_IP_OFFSET) printf(" - Flexible descriptor's Extraction: ip_offset=%u", data.ip_ofs); +#else + RTE_SET_USED(m); + RTE_VERIFY(false); +#endif } #ifdef __cplusplus diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c index 60aae704701..411b353417d 100644 --- a/drivers/net/intel/ice/ice_rxtx.c +++ b/drivers/net/intel/ice/ice_rxtx.c @@ -2043,7 +2043,7 @@ ice_rx_alloc_bufs(struct ci_rx_queue *rxq) alloc_idx = (uint16_t)(rxq->rx_free_trigger - (rxq->rx_free_thresh - 1)); rxep = &rxq->sw_ring[alloc_idx]; - diag = rte_mempool_get_bulk(rxq->mp, (void *)rxep, + diag = rte_mbuf_raw_alloc_bulk(rxq->mp, (void *)rxep, rxq->rx_free_thresh); if (unlikely(diag != 0)) { PMD_RX_LOG(ERR, "Failed to get mbufs in bulk"); @@ -2051,10 +2051,10 @@ ice_rx_alloc_bufs(struct ci_rx_queue *rxq) } if (rxq->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) { - diag_pay = rte_mempool_get_bulk(rxq->rxseg[1].mp, + diag_pay = rte_mbuf_raw_alloc_bulk(rxq->rxseg[1].mp, (void *)rxq->sw_split_buf, rxq->rx_free_thresh); if (unlikely(diag_pay != 0)) { - rte_mempool_put_bulk(rxq->mp, (void *)rxep, + rte_mbuf_raw_free_bulk(rxq->mp, (void *)rxep, rxq->rx_free_thresh); PMD_RX_LOG(ERR, "Failed to get payload mbufs in bulk"); return -ENOMEM; diff --git a/drivers/net/intel/idpf/idpf_common_device.h b/drivers/net/intel/idpf/idpf_common_device.h index 5f3e4a4fcf7..3b95d519c67 100644 --- a/drivers/net/intel/idpf/idpf_common_device.h +++ b/drivers/net/intel/idpf/idpf_common_device.h @@ -44,6 +44,16 @@ (sizeof(struct virtchnl2_ptype) + \ (((p)->proto_id_count ? ((p)->proto_id_count - 1) : 0) * sizeof((p)->proto_id[0]))) +enum idpf_rx_func_type { + IDPF_RX_DEFAULT, + IDPF_RX_SINGLEQ, + IDPF_RX_SINGLEQ_SCATTERED, + IDPF_RX_SINGLEQ_AVX2, + IDPF_RX_AVX512, + IDPF_RX_SINGLQ_AVX512, + IDPF_RX_MAX +}; + struct idpf_adapter { struct idpf_hw hw; struct virtchnl2_version_info virtchnl_version; @@ -59,6 +69,8 @@ struct idpf_adapter { /* For timestamp */ uint64_t time_hw; + + enum idpf_rx_func_type rx_func_type; }; struct idpf_chunks_info { @@ -121,12 +133,7 @@ struct idpf_vport { uint16_t devarg_id; - bool rx_vec_allowed; bool tx_vec_allowed; - bool rx_use_avx2; - bool tx_use_avx2; - bool rx_use_avx512; - bool tx_use_avx512; struct virtchnl2_vport_stats eth_stats_offset; diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.c b/drivers/net/intel/idpf/idpf_common_rxtx.c index eb25b091d86..a2b8c372d65 100644 --- a/drivers/net/intel/idpf/idpf_common_rxtx.c +++ b/drivers/net/intel/idpf/idpf_common_rxtx.c @@ -7,6 +7,8 @@ #include #include "idpf_common_rxtx.h" +#include "idpf_common_device.h" +#include "../common/rx.h" int idpf_timestamp_dynfield_offset = -1; uint64_t idpf_timestamp_dynflag; @@ -1622,3 +1624,52 @@ idpf_qc_splitq_rx_vec_setup(struct idpf_rx_queue *rxq) rxq->bufq2->idpf_ops = &def_rx_ops_vec; return idpf_rxq_vec_setup_default(rxq->bufq2); } + +RTE_EXPORT_INTERNAL_SYMBOL(idpf_rx_path_infos) +const struct ci_rx_path_info idpf_rx_path_infos[] = { + [IDPF_RX_DEFAULT] = { + .pkt_burst = idpf_dp_splitq_recv_pkts, + .info = "Split Scalar", + .features = { + .rx_offloads = IDPF_RX_SCALAR_OFFLOADS, + .simd_width = RTE_VECT_SIMD_DISABLED}}, + [IDPF_RX_SINGLEQ] = { + .pkt_burst = idpf_dp_singleq_recv_pkts, + .info = "Single Scalar", + .features = { + .rx_offloads = IDPF_RX_SCALAR_OFFLOADS, + .simd_width = RTE_VECT_SIMD_DISABLED, + .extra.single_queue = true}}, + [IDPF_RX_SINGLEQ_SCATTERED] = { + .pkt_burst = idpf_dp_singleq_recv_scatter_pkts, + .info = "Single Scalar Scattered", + .features = { + .rx_offloads = IDPF_RX_SCALAR_OFFLOADS, + .simd_width = RTE_VECT_SIMD_DISABLED, + .extra.scattered = true, + .extra.single_queue = true}}, +#ifdef RTE_ARCH_X86 + [IDPF_RX_SINGLEQ_AVX2] = { + .pkt_burst = idpf_dp_singleq_recv_pkts_avx2, + .info = "Single AVX2 Vector", + .features = { + .rx_offloads = IDPF_RX_VECTOR_OFFLOADS, + .simd_width = RTE_VECT_SIMD_256, + .extra.single_queue = true}}, +#ifdef CC_AVX512_SUPPORT + [IDPF_RX_AVX512] = { + .pkt_burst = idpf_dp_splitq_recv_pkts_avx512, + .info = "Split AVX512 Vector", + .features = { + .rx_offloads = IDPF_RX_VECTOR_OFFLOADS, + .simd_width = RTE_VECT_SIMD_512}}, + [IDPF_RX_SINGLQ_AVX512] = { + .pkt_burst = idpf_dp_singleq_recv_pkts_avx512, + .info = "Single AVX512 Vector", + .features = { + .rx_offloads = IDPF_RX_VECTOR_OFFLOADS, + .simd_width = RTE_VECT_SIMD_512, + .extra.single_queue = true}}, +#endif /* CC_AVX512_SUPPORT */ +#endif /* RTE_ARCH_X86 */ +}; diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.h b/drivers/net/intel/idpf/idpf_common_rxtx.h index f84a7603347..3bc3323af4f 100644 --- a/drivers/net/intel/idpf/idpf_common_rxtx.h +++ b/drivers/net/intel/idpf/idpf_common_rxtx.h @@ -11,6 +11,7 @@ #include "idpf_common_device.h" #include "../common/tx.h" +#include "../common/rx.h" #define IDPF_RX_MAX_BURST 32 @@ -96,6 +97,15 @@ #define IDPF_RX_SPLIT_BUFQ1_ID 1 #define IDPF_RX_SPLIT_BUFQ2_ID 2 +#define IDPF_RX_SCALAR_OFFLOADS ( \ + RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | \ + RTE_ETH_RX_OFFLOAD_UDP_CKSUM | \ + RTE_ETH_RX_OFFLOAD_TCP_CKSUM | \ + RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM | \ + RTE_ETH_RX_OFFLOAD_TIMESTAMP | \ + RTE_ETH_RX_OFFLOAD_SCATTER) +#define IDPF_RX_VECTOR_OFFLOADS 0 + struct idpf_rx_stats { RTE_ATOMIC(uint64_t) mbuf_alloc_failed; }; @@ -253,4 +263,6 @@ uint16_t idpf_dp_singleq_xmit_pkts_avx2(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +extern const struct ci_rx_path_info idpf_rx_path_infos[IDPF_RX_MAX]; + #endif /* _IDPF_COMMON_RXTX_H_ */ diff --git a/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c b/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c index 1babc5114b7..21c8f79254f 100644 --- a/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c +++ b/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c @@ -20,7 +20,7 @@ idpf_singleq_rx_rearm(struct idpf_rx_queue *rxq) rxdp += rxq->rxrearm_start; /* Pull 'n' more MBUFs into the software ring */ - if (rte_mempool_get_bulk(rxq->mp, + if (rte_mbuf_raw_alloc_bulk(rxq->mp, (void *)rxep, IDPF_RXQ_REARM_THRESH) < 0) { if (rxq->rxrearm_nb + IDPF_RXQ_REARM_THRESH >= diff --git a/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c b/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c index 06e73c87254..bc2cadd738e 100644 --- a/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c +++ b/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c @@ -22,7 +22,7 @@ idpf_singleq_rearm_common(struct idpf_rx_queue *rxq) rxdp += rxq->rxrearm_start; /* Pull 'n' more MBUFs into the software ring */ - if (rte_mempool_get_bulk(rxq->mp, + if (rte_mbuf_raw_alloc_bulk(rxq->mp, (void *)rxp, IDPF_RXQ_REARM_THRESH) < 0) { if (rxq->rxrearm_nb + IDPF_RXQ_REARM_THRESH >= @@ -551,7 +551,7 @@ idpf_splitq_rearm_common(struct idpf_rx_queue *rx_bufq) rxdp += rx_bufq->rxrearm_start; /* Pull 'n' more MBUFs into the software ring */ - if (rte_mempool_get_bulk(rx_bufq->mp, + if (rte_mbuf_raw_alloc_bulk(rx_bufq->mp, (void *)rxp, IDPF_RXQ_REARM_THRESH) < 0) { if (rx_bufq->rxrearm_nb + IDPF_RXQ_REARM_THRESH >= diff --git a/drivers/net/intel/idpf/idpf_ethdev.c b/drivers/net/intel/idpf/idpf_ethdev.c index 90720909bfe..c04842c9dfc 100644 --- a/drivers/net/intel/idpf/idpf_ethdev.c +++ b/drivers/net/intel/idpf/idpf_ethdev.c @@ -694,6 +694,8 @@ idpf_dev_configure(struct rte_eth_dev *dev) (dev->data->mtu == 0) ? IDPF_DEFAULT_MTU : dev->data->mtu + IDPF_ETH_OVERHEAD; + vport->adapter->rx_func_type = IDPF_RX_DEFAULT; + return 0; } diff --git a/drivers/net/intel/idpf/idpf_rxtx.c b/drivers/net/intel/idpf/idpf_rxtx.c index 5510cbd30a8..1c725065df0 100644 --- a/drivers/net/intel/idpf/idpf_rxtx.c +++ b/drivers/net/intel/idpf/idpf_rxtx.c @@ -760,110 +760,50 @@ void idpf_set_rx_function(struct rte_eth_dev *dev) { struct idpf_vport *vport = dev->data->dev_private; + struct idpf_adapter *ad = vport->adapter; + struct ci_rx_path_features req_features = { + .rx_offloads = dev->data->dev_conf.rxmode.offloads, + .simd_width = RTE_VECT_SIMD_DISABLED, + }; #ifdef RTE_ARCH_X86 struct idpf_rx_queue *rxq; int i; if (idpf_rx_vec_dev_check_default(dev) == IDPF_VECTOR_PATH && - rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) { - vport->rx_vec_allowed = true; + rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256) + req_features.simd_width = idpf_get_max_simd_bitwidth(); +#endif /* RTE_ARCH_X86 */ - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 && - rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256) - vport->rx_use_avx2 = true; + req_features.extra.single_queue = (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE); + req_features.extra.scattered = dev->data->scattered_rx; - if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512) -#ifdef CC_AVX512_SUPPORT - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 && - rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1 && - rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512DQ)) - vport->rx_use_avx512 = true; -#else - PMD_DRV_LOG(NOTICE, - "AVX512 is not supported in build env"); -#endif /* CC_AVX512_SUPPORT */ - } else { - vport->rx_vec_allowed = false; - } -#endif /* RTE_ARCH_X86 */ + ad->rx_func_type = ci_rx_path_select(req_features, + &idpf_rx_path_infos[0], + IDPF_RX_MAX, + IDPF_RX_DEFAULT); #ifdef RTE_ARCH_X86 - if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { - if (vport->rx_vec_allowed) { + if (idpf_rx_path_infos[ad->rx_func_type].features.simd_width >= RTE_VECT_SIMD_256) { + /* Vector function selected. Prepare the rxq accordingly. */ + if (idpf_rx_path_infos[ad->rx_func_type].features.extra.single_queue) { for (i = 0; i < dev->data->nb_rx_queues; i++) { - rxq = dev->data->rx_queues[i]; - (void)idpf_qc_splitq_rx_vec_setup(rxq); - } -#ifdef CC_AVX512_SUPPORT - if (vport->rx_use_avx512) { - PMD_DRV_LOG(NOTICE, - "Using Split AVX512 Vector Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_splitq_recv_pkts_avx512; - return; - } -#endif /* CC_AVX512_SUPPORT */ - } - PMD_DRV_LOG(NOTICE, - "Using Split Scalar Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_splitq_recv_pkts; - } else { - if (vport->rx_vec_allowed) { - for (i = 0; i < dev->data->nb_tx_queues; i++) { rxq = dev->data->rx_queues[i]; (void)idpf_qc_singleq_rx_vec_setup(rxq); } -#ifdef CC_AVX512_SUPPORT - if (vport->rx_use_avx512) { - PMD_DRV_LOG(NOTICE, - "Using Single AVX512 Vector Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_singleq_recv_pkts_avx512; - return; - } -#endif /* CC_AVX512_SUPPORT */ - if (vport->rx_use_avx2) { - PMD_DRV_LOG(NOTICE, - "Using Single AVX2 Vector Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_singleq_recv_pkts_avx2; - return; + } else { + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + (void)idpf_qc_splitq_rx_vec_setup(rxq); } } - if (dev->data->scattered_rx) { - PMD_DRV_LOG(NOTICE, - "Using Single Scalar Scatterd Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_singleq_recv_scatter_pkts; - return; - } - PMD_DRV_LOG(NOTICE, - "Using Single Scalar Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_singleq_recv_pkts; - } -#else - if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { - PMD_DRV_LOG(NOTICE, - "Using Split Scalar Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_splitq_recv_pkts; - } else { - if (dev->data->scattered_rx) { - PMD_DRV_LOG(NOTICE, - "Using Single Scalar Scatterd Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_singleq_recv_scatter_pkts; - return; - } - PMD_DRV_LOG(NOTICE, - "Using Single Scalar Rx (port %d).", - dev->data->port_id); - dev->rx_pkt_burst = idpf_dp_singleq_recv_pkts; } -#endif /* RTE_ARCH_X86 */ +#endif + + dev->rx_pkt_burst = idpf_rx_path_infos[ad->rx_func_type].pkt_burst; + PMD_DRV_LOG(NOTICE, "Using %s Rx (port %d).", + idpf_rx_path_infos[ad->rx_func_type].info, dev->data->port_id); + } void @@ -871,6 +811,7 @@ idpf_set_tx_function(struct rte_eth_dev *dev) { struct idpf_vport *vport = dev->data->dev_private; #ifdef RTE_ARCH_X86 + enum rte_vect_max_simd tx_simd_width = RTE_VECT_SIMD_DISABLED; #ifdef CC_AVX512_SUPPORT struct ci_tx_queue *txq; int i; @@ -879,22 +820,12 @@ idpf_set_tx_function(struct rte_eth_dev *dev) if (idpf_tx_vec_dev_check_default(dev) == IDPF_VECTOR_PATH && rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) { vport->tx_vec_allowed = true; - - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 && - rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256) - vport->tx_use_avx2 = true; - - if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512) + tx_simd_width = idpf_get_max_simd_bitwidth(); #ifdef CC_AVX512_SUPPORT - { - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 && - rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1) - vport->tx_use_avx512 = true; - if (vport->tx_use_avx512) { - for (i = 0; i < dev->data->nb_tx_queues; i++) { - txq = dev->data->tx_queues[i]; - idpf_qc_tx_vec_avx512_setup(txq); - } + if (tx_simd_width == RTE_VECT_SIMD_512) { + for (i = 0; i < dev->data->nb_tx_queues; i++) { + txq = dev->data->tx_queues[i]; + idpf_qc_tx_vec_avx512_setup(txq); } } #else @@ -910,7 +841,7 @@ idpf_set_tx_function(struct rte_eth_dev *dev) if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { if (vport->tx_vec_allowed) { #ifdef CC_AVX512_SUPPORT - if (vport->tx_use_avx512) { + if (tx_simd_width == RTE_VECT_SIMD_512) { PMD_DRV_LOG(NOTICE, "Using Split AVX512 Vector Tx (port %d).", dev->data->port_id); @@ -928,7 +859,7 @@ idpf_set_tx_function(struct rte_eth_dev *dev) } else { if (vport->tx_vec_allowed) { #ifdef CC_AVX512_SUPPORT - if (vport->tx_use_avx512) { + if (tx_simd_width == RTE_VECT_SIMD_512) { for (i = 0; i < dev->data->nb_tx_queues; i++) { txq = dev->data->tx_queues[i]; if (txq == NULL) @@ -943,7 +874,7 @@ idpf_set_tx_function(struct rte_eth_dev *dev) return; } #endif /* CC_AVX512_SUPPORT */ - if (vport->tx_use_avx2) { + if (tx_simd_width == RTE_VECT_SIMD_256) { PMD_DRV_LOG(NOTICE, "Using Single AVX2 Vector Tx (port %d).", dev->data->port_id); diff --git a/drivers/net/intel/idpf/idpf_rxtx_vec_common.h b/drivers/net/intel/idpf/idpf_rxtx_vec_common.h index ff3ae56baf0..ecdf2f0e23f 100644 --- a/drivers/net/intel/idpf/idpf_rxtx_vec_common.h +++ b/drivers/net/intel/idpf/idpf_rxtx_vec_common.h @@ -11,6 +11,9 @@ #include "idpf_ethdev.h" #include "idpf_rxtx.h" #include "../common/rx.h" +#ifdef RTE_ARCH_X86 +#include "../common/rx_vec_x86.h" +#endif #define IDPF_SCALAR_PATH 0 #define IDPF_VECTOR_PATH 1 @@ -129,4 +132,12 @@ idpf_tx_vec_dev_check_default(struct rte_eth_dev *dev) return IDPF_VECTOR_PATH; } +#ifdef RTE_ARCH_X86 +static inline enum rte_vect_max_simd +idpf_get_max_simd_bitwidth(void) +{ + return ci_get_x86_max_simd_bitwidth(); +} +#endif + #endif /*_IDPF_RXTX_VEC_COMMON_H_*/ diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c index c5242c4171a..897ee2b671a 100644 --- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c +++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c @@ -141,8 +141,7 @@ ixgbe_tx_free_bufs(struct ci_tx_queue *txq) if (nb_free >= IXGBE_TX_MAX_FREE_BUF_SZ || (nb_free > 0 && m->pool != free[0]->pool)) { - rte_mempool_put_bulk(free[0]->pool, - (void **)free, nb_free); + rte_mbuf_raw_free_bulk(free[0]->pool, free, nb_free); nb_free = 0; } @@ -150,7 +149,7 @@ ixgbe_tx_free_bufs(struct ci_tx_queue *txq) } if (nb_free > 0) - rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); + rte_mbuf_raw_free_bulk(free[0]->pool, free, nb_free); /* buffers were freed, update counters */ txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh); @@ -1659,7 +1658,7 @@ ixgbe_rx_alloc_bufs(struct ci_rx_queue *rxq, bool reset_mbuf) /* allocate buffers in bulk directly into the S/W ring */ alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1); rxep = &rxq->sw_ring[alloc_idx]; - diag = rte_mempool_get_bulk(rxq->mp, (void *)rxep, + diag = rte_mbuf_raw_alloc_bulk(rxq->mp, (void *)rxep, rxq->rx_free_thresh); if (unlikely(diag != 0)) return -ENOMEM; diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h index e54f5324973..7fb3c3dc249 100644 --- a/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h +++ b/drivers/net/intel/ixgbe/ixgbe_rxtx_vec_common.h @@ -55,14 +55,13 @@ ixgbe_tx_free_bufs_vec(struct ci_tx_queue *txq) if (likely(m->pool == free[0]->pool)) free[nb_free++] = m; else { - rte_mempool_put_bulk(free[0]->pool, - (void *)free, nb_free); + rte_mbuf_raw_free_bulk(free[0]->pool, free, nb_free); free[0] = m; nb_free = 1; } } } - rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); + rte_mbuf_raw_free_bulk(free[0]->pool, free, nb_free); } else { for (i = 1; i < n; i++) { m = rte_pktmbuf_prefree_seg(txep[i].mbuf); diff --git a/drivers/net/mana/meson.build b/drivers/net/mana/meson.build index e320da7fc44..19d4b3695ea 100644 --- a/drivers/net/mana/meson.build +++ b/drivers/net/mana/meson.build @@ -27,7 +27,7 @@ foreach libname:libnames if not lib.found() lib = cc.find_library(libname, required:false) endif - if lib.found() + if lib.found() and cc.links(min_c_code, dependencies: lib) libs += lib ext_deps += lib else diff --git a/drivers/net/mlx4/meson.build b/drivers/net/mlx4/meson.build index 869d2895c84..1eb67f3c472 100644 --- a/drivers/net/mlx4/meson.build +++ b/drivers/net/mlx4/meson.build @@ -31,7 +31,7 @@ foreach libname:libnames if not lib.found() and not static_ibverbs lib = cc.find_library(libname, required:false) endif - if lib.found() + if lib.found() and cc.links(min_c_code, dependencies: lib) libs += lib if not static_ibverbs and not dlopen_ibverbs ext_deps += lib diff --git a/drivers/net/mlx5/rte_pmd_mlx5.h b/drivers/net/mlx5/rte_pmd_mlx5.h index fdd2f658887..f2c6aebe0b2 100644 --- a/drivers/net/mlx5/rte_pmd_mlx5.h +++ b/drivers/net/mlx5/rte_pmd_mlx5.h @@ -5,6 +5,9 @@ #ifndef RTE_PMD_PRIVATE_MLX5_H_ #define RTE_PMD_PRIVATE_MLX5_H_ +#include + +#include #include /** diff --git a/drivers/net/pcap/meson.build b/drivers/net/pcap/meson.build index 676c55018e6..f30fc8338b2 100644 --- a/drivers/net/pcap/meson.build +++ b/drivers/net/pcap/meson.build @@ -13,7 +13,11 @@ sources = files( ext_deps += pcap_dep if is_windows - ext_deps += cc.find_library('iphlpapi', required: true) + iphlpapi_dep = cc.find_library('iphlpapi', required: true) + if not cc.links(min_c_code, dependencies: iphlpapi_dep) + error('broken dependency, "iphlpapi"') + endif + ext_deps += iphlpapi_dep endif require_iova_in_mbuf = false diff --git a/drivers/net/txgbe/meson.build b/drivers/net/txgbe/meson.build index 4dbbf597bbc..5cdec017ed7 100644 --- a/drivers/net/txgbe/meson.build +++ b/drivers/net/txgbe/meson.build @@ -31,4 +31,4 @@ elif arch_subdir == 'arm' sources += files('txgbe_rxtx_vec_neon.c') endif -install_headers('rte_pmd_txgbe.h') +headers = files('rte_pmd_txgbe.h') diff --git a/drivers/net/xsc/xsc_vfio.c b/drivers/net/xsc/xsc_vfio.c index c233728c5f6..21779718270 100644 --- a/drivers/net/xsc/xsc_vfio.c +++ b/drivers/net/xsc/xsc_vfio.c @@ -2,11 +2,12 @@ * Copyright 2025 Yunsilicon Technology Co., Ltd. */ +#include + #include #include #include #include -#include #include #include diff --git a/drivers/power/amd_uncore/meson.build b/drivers/power/amd_uncore/meson.build index 8cbab47b01f..cab260b64eb 100644 --- a/drivers/power/amd_uncore/meson.build +++ b/drivers/power/amd_uncore/meson.build @@ -9,7 +9,7 @@ endif ESMI_header = '#include' lib = cc.find_library('e_smi64', required: false) -if not lib.found() +if not lib.found() or not cc.links(min_c_code, dependencies: lib) build = false reason = 'missing dependency, "libe_smi"' else diff --git a/drivers/raw/dpaa2_cmdif/rte_pmd_dpaa2_cmdif.h b/drivers/raw/dpaa2_cmdif/rte_pmd_dpaa2_cmdif.h index 483b66eaae6..7731fc63635 100644 --- a/drivers/raw/dpaa2_cmdif/rte_pmd_dpaa2_cmdif.h +++ b/drivers/raw/dpaa2_cmdif/rte_pmd_dpaa2_cmdif.h @@ -12,6 +12,8 @@ * */ +#include + #ifdef __cplusplus extern "C" { #endif diff --git a/drivers/raw/ifpga/afu_pmd_he_hssi.c b/drivers/raw/ifpga/afu_pmd_he_hssi.c index 859f28dcc1f..bcb952935a0 100644 --- a/drivers/raw/ifpga/afu_pmd_he_hssi.c +++ b/drivers/raw/ifpga/afu_pmd_he_hssi.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/raw/ifpga/afu_pmd_he_lpbk.c b/drivers/raw/ifpga/afu_pmd_he_lpbk.c index c7c5cda48c3..df3b093157c 100644 --- a/drivers/raw/ifpga/afu_pmd_he_lpbk.c +++ b/drivers/raw/ifpga/afu_pmd_he_lpbk.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/raw/ifpga/afu_pmd_he_mem.c b/drivers/raw/ifpga/afu_pmd_he_mem.c index a1db533eeb9..b595cd729ca 100644 --- a/drivers/raw/ifpga/afu_pmd_he_mem.c +++ b/drivers/raw/ifpga/afu_pmd_he_mem.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/raw/ifpga/afu_pmd_n3000.c b/drivers/raw/ifpga/afu_pmd_n3000.c index 6aae1b224e9..b4c2f0d0a87 100644 --- a/drivers/raw/ifpga/afu_pmd_n3000.c +++ b/drivers/raw/ifpga/afu_pmd_n3000.c @@ -2,6 +2,8 @@ * Copyright(c) 2022 Intel Corporation */ +#include + #include #include #include @@ -17,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -1473,7 +1474,6 @@ static struct rte_pci_device *n3000_afu_get_pci_dev(struct afu_rawdev *dev) return RTE_DEV_TO_PCI(afudev->rawdev->device); } -#ifdef VFIO_PRESENT static int dma_afu_set_irqs(struct afu_rawdev *dev, uint32_t vec_start, uint32_t count, int *efds) { @@ -1511,7 +1511,6 @@ static int dma_afu_set_irqs(struct afu_rawdev *dev, uint32_t vec_start, rte_free(irq_set); return ret; } -#endif static void *n3000_afu_get_port_addr(struct afu_rawdev *dev) { @@ -1724,10 +1723,8 @@ static int dma_afu_ctx_init(struct afu_rawdev *dev, int index, uint8_t *addr) IFPGA_RAWDEV_PMD_ERR("eventfd create failed"); return -EBADF; } -#ifdef VFIO_PRESENT if (dma_afu_set_irqs(dev, vec_start, 1, efds)) IFPGA_RAWDEV_PMD_ERR("DMA interrupt setup failed"); -#endif } ctx->event_fd = efds[0]; diff --git a/drivers/raw/ifpga/base/ifpga_feature_dev.c b/drivers/raw/ifpga/base/ifpga_feature_dev.c index 0a00af1b6b7..1493d87ea49 100644 --- a/drivers/raw/ifpga/base/ifpga_feature_dev.c +++ b/drivers/raw/ifpga/base/ifpga_feature_dev.c @@ -2,8 +2,9 @@ * Copyright(c) 2010-2018 Intel Corporation */ +#include + #include -#include #include "ifpga_feature_dev.h" diff --git a/drivers/raw/ifpga/meson.build b/drivers/raw/ifpga/meson.build index 4295ec04bdc..395894af2ad 100644 --- a/drivers/raw/ifpga/meson.build +++ b/drivers/raw/ifpga/meson.build @@ -11,7 +11,7 @@ rtdep = dependency('librt', required: false) if not rtdep.found() rtdep = cc.find_library('rt', required: false) endif -if not rtdep.found() +if not rtdep.found() or not cc.links(min_c_code, dependencies: rtdep) build = false reason = 'missing dependency, "librt"' subdir_done() diff --git a/drivers/raw/ntb/rte_pmd_ntb.h b/drivers/raw/ntb/rte_pmd_ntb.h index 6591ce79319..76da3be0262 100644 --- a/drivers/raw/ntb/rte_pmd_ntb.h +++ b/drivers/raw/ntb/rte_pmd_ntb.h @@ -5,6 +5,8 @@ #ifndef _RTE_PMD_NTB_H_ #define _RTE_PMD_NTB_H_ +#include + /* App needs to set/get these attrs */ #define NTB_QUEUE_SZ_NAME "queue_size" #define NTB_QUEUE_NUM_NAME "queue_num" diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 65de383b958..f319d455ba6 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -2,6 +2,8 @@ * Copyright(c) 2018 Intel Corporation */ +#include + #include #include #include diff --git a/drivers/vdpa/nfp/nfp_vdpa.c b/drivers/vdpa/nfp/nfp_vdpa.c index 7f2f21ec6c5..f4fd5c92ec9 100644 --- a/drivers/vdpa/nfp/nfp_vdpa.c +++ b/drivers/vdpa/nfp/nfp_vdpa.c @@ -3,6 +3,8 @@ * All rights reserved. */ +#include + #include #include #include diff --git a/drivers/vdpa/sfc/sfc_vdpa_ops.c b/drivers/vdpa/sfc/sfc_vdpa_ops.c index 00f9a4b04c7..84ba141b348 100644 --- a/drivers/vdpa/sfc/sfc_vdpa_ops.c +++ b/drivers/vdpa/sfc/sfc_vdpa_ops.c @@ -2,6 +2,8 @@ * Copyright(c) 2020-2021 Xilinx, Inc. */ +#include + #include #include #include @@ -9,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/examples/l2fwd-cat/meson.build b/examples/l2fwd-cat/meson.build index ebcc3f66f23..5e8ff3a56ce 100644 --- a/examples/l2fwd-cat/meson.build +++ b/examples/l2fwd-cat/meson.build @@ -7,7 +7,7 @@ # DPDK instance, use 'make' pqos = cc.find_library('pqos', required: false) -build = pqos.found() +build = (pqos.found() and cc.links(min_c_code, dependencies: pqos)) if not build subdir_done() endif diff --git a/examples/l2fwd-keepalive/meson.build b/examples/l2fwd-keepalive/meson.build index ed49c612387..e3f72aedb85 100644 --- a/examples/l2fwd-keepalive/meson.build +++ b/examples/l2fwd-keepalive/meson.build @@ -8,7 +8,7 @@ allow_experimental_apis = true librt = cc.find_library('rt', required: false) -if not librt.found() +if not librt.found() or not cc.links(min_c_code, dependencies: librt) build = false subdir_done() endif diff --git a/examples/vm_power_manager/guest_cli/meson.build b/examples/vm_power_manager/guest_cli/meson.build index 00bc32526d8..6a6809240e1 100644 --- a/examples/vm_power_manager/guest_cli/meson.build +++ b/examples/vm_power_manager/guest_cli/meson.build @@ -17,5 +17,5 @@ sources = files( allow_experimental_apis = true opt_dep = cc.find_library('virt', required : false) -build = opt_dep.found() +build = (opt_dep.found() and cc.links(min_c_code, dependencies: opt_dep)) ext_deps += opt_dep diff --git a/examples/vm_power_manager/meson.build b/examples/vm_power_manager/meson.build index dcf23198eb2..53949ce6d8a 100644 --- a/examples/vm_power_manager/meson.build +++ b/examples/vm_power_manager/meson.build @@ -38,7 +38,7 @@ else endif opt_dep = cc.find_library('virt', required : false) -build = opt_dep.found() +build = (opt_dep.found() and cc.links(min_c_code, dependencies: opt_dep)) ext_deps += opt_dep opt_dep = dependency('jansson', required : false, method: 'pkg-config') diff --git a/kernel/linux/uapi/linux/vduse.h b/kernel/linux/uapi/linux/vduse.h index 68a627d04af..f46269af349 100644 --- a/kernel/linux/uapi/linux/vduse.h +++ b/kernel/linux/uapi/linux/vduse.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ -#ifndef _UAPI_VDUSE_H_ -#define _UAPI_VDUSE_H_ +#ifndef _VDUSE_H_ +#define _VDUSE_H_ #include @@ -350,4 +350,4 @@ struct vduse_dev_response { }; }; -#endif /* _UAPI_VDUSE_H_ */ +#endif /* _VDUSE_H_ */ diff --git a/kernel/linux/uapi/linux/vfio.h b/kernel/linux/uapi/linux/vfio.h new file mode 100644 index 00000000000..79bf8c0cc5e --- /dev/null +++ b/kernel/linux/uapi/linux/vfio.h @@ -0,0 +1,1836 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * VFIO API definition + * + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef VFIO_H +#define VFIO_H + +#include +#include + +#define VFIO_API_VERSION 0 + + +/* Kernel & User level defines for VFIO IOCTLs. */ + +/* Extensions */ + +#define VFIO_TYPE1_IOMMU 1 +#define VFIO_SPAPR_TCE_IOMMU 2 +#define VFIO_TYPE1v2_IOMMU 3 +/* + * IOMMU enforces DMA cache coherence (ex. PCIe NoSnoop stripping). This + * capability is subject to change as groups are added or removed. + */ +#define VFIO_DMA_CC_IOMMU 4 + +/* Check if EEH is supported */ +#define VFIO_EEH 5 + +/* Two-stage IOMMU */ +#define __VFIO_RESERVED_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ + +#define VFIO_SPAPR_TCE_v2_IOMMU 7 + +/* + * The No-IOMMU IOMMU offers no translation or isolation for devices and + * supports no ioctls outside of VFIO_CHECK_EXTENSION. Use of VFIO's No-IOMMU + * code will taint the host kernel and should be used with extreme caution. + */ +#define VFIO_NOIOMMU_IOMMU 8 + +/* Supports VFIO_DMA_UNMAP_FLAG_ALL */ +#define VFIO_UNMAP_ALL 9 + +/* + * Supports the vaddr flag for DMA map and unmap. Not supported for mediated + * devices, so this capability is subject to change as groups are added or + * removed. + */ +#define VFIO_UPDATE_VADDR 10 + +/* + * The IOCTL interface is designed for extensibility by embedding the + * structure length (argsz) and flags into structures passed between + * kernel and userspace. We therefore use the _IO() macro for these + * defines to avoid implicitly embedding a size into the ioctl request. + * As structure fields are added, argsz will increase to match and flag + * bits will be defined to indicate additional fields with valid data. + * It's *always* the caller's responsibility to indicate the size of + * the structure passed by setting argsz appropriately. + */ + +#define VFIO_TYPE (';') +#define VFIO_BASE 100 + +/* + * For extension of INFO ioctls, VFIO makes use of a capability chain + * designed after PCI/e capabilities. A flag bit indicates whether + * this capability chain is supported and a field defined in the fixed + * structure defines the offset of the first capability in the chain. + * This field is only valid when the corresponding bit in the flags + * bitmap is set. This offset field is relative to the start of the + * INFO buffer, as is the next field within each capability header. + * The id within the header is a shared address space per INFO ioctl, + * while the version field is specific to the capability id. The + * contents following the header are specific to the capability id. + */ +struct vfio_info_cap_header { + __u16 id; /* Identifies capability */ + __u16 version; /* Version specific to the capability ID */ + __u32 next; /* Offset of next capability */ +}; + +/* + * Callers of INFO ioctls passing insufficiently sized buffers will see + * the capability chain flag bit set, a zero value for the first capability + * offset (if available within the provided argsz), and argsz will be + * updated to report the necessary buffer size. For compatibility, the + * INFO ioctl will not report error in this case, but the capability chain + * will not be available. + */ + +/* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */ + +/** + * VFIO_GET_API_VERSION - _IO(VFIO_TYPE, VFIO_BASE + 0) + * + * Report the version of the VFIO API. This allows us to bump the entire + * API version should we later need to add or change features in incompatible + * ways. + * Return: VFIO_API_VERSION + * Availability: Always + */ +#define VFIO_GET_API_VERSION _IO(VFIO_TYPE, VFIO_BASE + 0) + +/** + * VFIO_CHECK_EXTENSION - _IOW(VFIO_TYPE, VFIO_BASE + 1, __u32) + * + * Check whether an extension is supported. + * Return: 0 if not supported, 1 (or some other positive integer) if supported. + * Availability: Always + */ +#define VFIO_CHECK_EXTENSION _IO(VFIO_TYPE, VFIO_BASE + 1) + +/** + * VFIO_SET_IOMMU - _IOW(VFIO_TYPE, VFIO_BASE + 2, __s32) + * + * Set the iommu to the given type. The type must be supported by an + * iommu driver as verified by calling CHECK_EXTENSION using the same + * type. A group must be set to this file descriptor before this + * ioctl is available. The IOMMU interfaces enabled by this call are + * specific to the value set. + * Return: 0 on success, -errno on failure + * Availability: When VFIO group attached + */ +#define VFIO_SET_IOMMU _IO(VFIO_TYPE, VFIO_BASE + 2) + +/* -------- IOCTLs for GROUP file descriptors (/dev/vfio/$GROUP) -------- */ + +/** + * VFIO_GROUP_GET_STATUS - _IOR(VFIO_TYPE, VFIO_BASE + 3, + * struct vfio_group_status) + * + * Retrieve information about the group. Fills in provided + * struct vfio_group_info. Caller sets argsz. + * Return: 0 on succes, -errno on failure. + * Availability: Always + */ +struct vfio_group_status { + __u32 argsz; + __u32 flags; +#define VFIO_GROUP_FLAGS_VIABLE (1 << 0) +#define VFIO_GROUP_FLAGS_CONTAINER_SET (1 << 1) +}; +#define VFIO_GROUP_GET_STATUS _IO(VFIO_TYPE, VFIO_BASE + 3) + +/** + * VFIO_GROUP_SET_CONTAINER - _IOW(VFIO_TYPE, VFIO_BASE + 4, __s32) + * + * Set the container for the VFIO group to the open VFIO file + * descriptor provided. Groups may only belong to a single + * container. Containers may, at their discretion, support multiple + * groups. Only when a container is set are all of the interfaces + * of the VFIO file descriptor and the VFIO group file descriptor + * available to the user. + * Return: 0 on success, -errno on failure. + * Availability: Always + */ +#define VFIO_GROUP_SET_CONTAINER _IO(VFIO_TYPE, VFIO_BASE + 4) + +/** + * VFIO_GROUP_UNSET_CONTAINER - _IO(VFIO_TYPE, VFIO_BASE + 5) + * + * Remove the group from the attached container. This is the + * opposite of the SET_CONTAINER call and returns the group to + * an initial state. All device file descriptors must be released + * prior to calling this interface. When removing the last group + * from a container, the IOMMU will be disabled and all state lost, + * effectively also returning the VFIO file descriptor to an initial + * state. + * Return: 0 on success, -errno on failure. + * Availability: When attached to container + */ +#define VFIO_GROUP_UNSET_CONTAINER _IO(VFIO_TYPE, VFIO_BASE + 5) + +/** + * VFIO_GROUP_GET_DEVICE_FD - _IOW(VFIO_TYPE, VFIO_BASE + 6, char) + * + * Return a new file descriptor for the device object described by + * the provided string. The string should match a device listed in + * the devices subdirectory of the IOMMU group sysfs entry. The + * group containing the device must already be added to this context. + * Return: new file descriptor on success, -errno on failure. + * Availability: When attached to container + */ +#define VFIO_GROUP_GET_DEVICE_FD _IO(VFIO_TYPE, VFIO_BASE + 6) + +/* --------------- IOCTLs for DEVICE file descriptors --------------- */ + +/** + * VFIO_DEVICE_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 7, + * struct vfio_device_info) + * + * Retrieve information about the device. Fills in provided + * struct vfio_device_info. Caller sets argsz. + * Return: 0 on success, -errno on failure. + */ +struct vfio_device_info { + __u32 argsz; + __u32 flags; +#define VFIO_DEVICE_FLAGS_RESET (1 << 0) /* Device supports reset */ +#define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */ +#define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */ +#define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ +#define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */ +#define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */ +#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */ +#define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */ +#define VFIO_DEVICE_FLAGS_CDX (1 << 8) /* vfio-cdx device */ + __u32 num_regions; /* Max region index + 1 */ + __u32 num_irqs; /* Max IRQ index + 1 */ + __u32 cap_offset; /* Offset within info struct of first cap */ + __u32 pad; +}; +#define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7) + +/* + * Vendor driver using Mediated device framework should provide device_api + * attribute in supported type attribute groups. Device API string should be one + * of the following corresponding to device flags in vfio_device_info structure. + */ + +#define VFIO_DEVICE_API_PCI_STRING "vfio-pci" +#define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform" +#define VFIO_DEVICE_API_AMBA_STRING "vfio-amba" +#define VFIO_DEVICE_API_CCW_STRING "vfio-ccw" +#define VFIO_DEVICE_API_AP_STRING "vfio-ap" + +/* + * The following capabilities are unique to s390 zPCI devices. Their contents + * are further-defined in vfio_zdev.h + */ +#define VFIO_DEVICE_INFO_CAP_ZPCI_BASE 1 +#define VFIO_DEVICE_INFO_CAP_ZPCI_GROUP 2 +#define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL 3 +#define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP 4 + +/* + * The following VFIO_DEVICE_INFO capability reports support for PCIe AtomicOp + * completion to the root bus with supported widths provided via flags. + */ +#define VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP 5 +struct vfio_device_info_cap_pci_atomic_comp { + struct vfio_info_cap_header header; + __u32 flags; +#define VFIO_PCI_ATOMIC_COMP32 (1 << 0) +#define VFIO_PCI_ATOMIC_COMP64 (1 << 1) +#define VFIO_PCI_ATOMIC_COMP128 (1 << 2) + __u32 reserved; +}; + +/** + * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, + * struct vfio_region_info) + * + * Retrieve information about a device region. Caller provides + * struct vfio_region_info with index value set. Caller sets argsz. + * Implementation of region mapping is bus driver specific. This is + * intended to describe MMIO, I/O port, as well as bus specific + * regions (ex. PCI config space). Zero sized regions may be used + * to describe unimplemented regions (ex. unimplemented PCI BARs). + * Return: 0 on success, -errno on failure. + */ +struct vfio_region_info { + __u32 argsz; + __u32 flags; +#define VFIO_REGION_INFO_FLAG_READ (1 << 0) /* Region supports read */ +#define VFIO_REGION_INFO_FLAG_WRITE (1 << 1) /* Region supports write */ +#define VFIO_REGION_INFO_FLAG_MMAP (1 << 2) /* Region supports mmap */ +#define VFIO_REGION_INFO_FLAG_CAPS (1 << 3) /* Info supports caps */ + __u32 index; /* Region index */ + __u32 cap_offset; /* Offset within info struct of first cap */ + __aligned_u64 size; /* Region size (bytes) */ + __aligned_u64 offset; /* Region offset from start of device fd */ +}; +#define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8) + +/* + * The sparse mmap capability allows finer granularity of specifying areas + * within a region with mmap support. When specified, the user should only + * mmap the offset ranges specified by the areas array. mmaps outside of the + * areas specified may fail (such as the range covering a PCI MSI-X table) or + * may result in improper device behavior. + * + * The structures below define version 1 of this capability. + */ +#define VFIO_REGION_INFO_CAP_SPARSE_MMAP 1 + +struct vfio_region_sparse_mmap_area { + __aligned_u64 offset; /* Offset of mmap'able area within region */ + __aligned_u64 size; /* Size of mmap'able area */ +}; + +struct vfio_region_info_cap_sparse_mmap { + struct vfio_info_cap_header header; + __u32 nr_areas; + __u32 reserved; + struct vfio_region_sparse_mmap_area areas[]; +}; + +/* + * The device specific type capability allows regions unique to a specific + * device or class of devices to be exposed. This helps solve the problem for + * vfio bus drivers of defining which region indexes correspond to which region + * on the device, without needing to resort to static indexes, as done by + * vfio-pci. For instance, if we were to go back in time, we might remove + * VFIO_PCI_VGA_REGION_INDEX and let vfio-pci simply define that all indexes + * greater than or equal to VFIO_PCI_NUM_REGIONS are device specific and we'd + * make a "VGA" device specific type to describe the VGA access space. This + * means that non-VGA devices wouldn't need to waste this index, and thus the + * address space associated with it due to implementation of device file + * descriptor offsets in vfio-pci. + * + * The current implementation is now part of the user ABI, so we can't use this + * for VGA, but there are other upcoming use cases, such as opregions for Intel + * IGD devices and framebuffers for vGPU devices. We missed VGA, but we'll + * use this for future additions. + * + * The structure below defines version 1 of this capability. + */ +#define VFIO_REGION_INFO_CAP_TYPE 2 + +struct vfio_region_info_cap_type { + struct vfio_info_cap_header header; + __u32 type; /* global per bus driver */ + __u32 subtype; /* type specific */ +}; + +/* + * List of region types, global per bus driver. + * If you introduce a new type, please add it here. + */ + +/* PCI region type containing a PCI vendor part */ +#define VFIO_REGION_TYPE_PCI_VENDOR_TYPE (1 << 31) +#define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) +#define VFIO_REGION_TYPE_GFX (1) +#define VFIO_REGION_TYPE_CCW (2) +#define VFIO_REGION_TYPE_MIGRATION_DEPRECATED (3) + +/* sub-types for VFIO_REGION_TYPE_PCI_* */ + +/* 8086 vendor PCI sub-types */ +#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1) +#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2) +#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3) + +/* 10de vendor PCI sub-types */ +/* + * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space. + * + * Deprecated, region no longer provided + */ +#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1) + +/* 1014 vendor PCI sub-types */ +/* + * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU + * to do TLB invalidation on a GPU. + * + * Deprecated, region no longer provided + */ +#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1) + +/* sub-types for VFIO_REGION_TYPE_GFX */ +#define VFIO_REGION_SUBTYPE_GFX_EDID (1) + +/** + * struct vfio_region_gfx_edid - EDID region layout. + * + * Set display link state and EDID blob. + * + * The EDID blob has monitor information such as brand, name, serial + * number, physical size, supported video modes and more. + * + * This special region allows userspace (typically qemu) set a virtual + * EDID for the virtual monitor, which allows a flexible display + * configuration. + * + * For the edid blob spec look here: + * https://en.wikipedia.org/wiki/Extended_Display_Identification_Data + * + * On linux systems you can find the EDID blob in sysfs: + * /sys/class/drm/${card}/${connector}/edid + * + * You can use the edid-decode ulility (comes with xorg-x11-utils) to + * decode the EDID blob. + * + * @edid_offset: location of the edid blob, relative to the + * start of the region (readonly). + * @edid_max_size: max size of the edid blob (readonly). + * @edid_size: actual edid size (read/write). + * @link_state: display link state (read/write). + * VFIO_DEVICE_GFX_LINK_STATE_UP: Monitor is turned on. + * VFIO_DEVICE_GFX_LINK_STATE_DOWN: Monitor is turned off. + * @max_xres: max display width (0 == no limitation, readonly). + * @max_yres: max display height (0 == no limitation, readonly). + * + * EDID update protocol: + * (1) set link-state to down. + * (2) update edid blob and size. + * (3) set link-state to up. + */ +struct vfio_region_gfx_edid { + __u32 edid_offset; + __u32 edid_max_size; + __u32 edid_size; + __u32 max_xres; + __u32 max_yres; + __u32 link_state; +#define VFIO_DEVICE_GFX_LINK_STATE_UP 1 +#define VFIO_DEVICE_GFX_LINK_STATE_DOWN 2 +}; + +/* sub-types for VFIO_REGION_TYPE_CCW */ +#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1) +#define VFIO_REGION_SUBTYPE_CCW_SCHIB (2) +#define VFIO_REGION_SUBTYPE_CCW_CRW (3) + +/* sub-types for VFIO_REGION_TYPE_MIGRATION */ +#define VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED (1) + +struct vfio_device_migration_info { + __u32 device_state; /* VFIO device state */ +#define VFIO_DEVICE_STATE_V1_STOP (0) +#define VFIO_DEVICE_STATE_V1_RUNNING (1 << 0) +#define VFIO_DEVICE_STATE_V1_SAVING (1 << 1) +#define VFIO_DEVICE_STATE_V1_RESUMING (1 << 2) +#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_V1_RUNNING | \ + VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING) + +#define VFIO_DEVICE_STATE_VALID(state) \ + (state & VFIO_DEVICE_STATE_V1_RESUMING ? \ + (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_V1_RESUMING : 1) + +#define VFIO_DEVICE_STATE_IS_ERROR(state) \ + ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING)) + +#define VFIO_DEVICE_STATE_SET_ERROR(state) \ + ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING) + + __u32 reserved; + __aligned_u64 pending_bytes; + __aligned_u64 data_offset; + __aligned_u64 data_size; +}; + +/* + * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped + * which allows direct access to non-MSIX registers which happened to be within + * the same system page. + * + * Even though the userspace gets direct access to the MSIX data, the existing + * VFIO_DEVICE_SET_IRQS interface must still be used for MSIX configuration. + */ +#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE 3 + +/* + * Capability with compressed real address (aka SSA - small system address) + * where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing + * and by the userspace to associate a NVLink bridge with a GPU. + * + * Deprecated, capability no longer provided + */ +#define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT 4 + +struct vfio_region_info_cap_nvlink2_ssatgt { + struct vfio_info_cap_header header; + __aligned_u64 tgt; +}; + +/* + * Capability with an NVLink link speed. The value is read by + * the NVlink2 bridge driver from the bridge's "ibm,nvlink-speed" + * property in the device tree. The value is fixed in the hardware + * and failing to provide the correct value results in the link + * not working with no indication from the driver why. + * + * Deprecated, capability no longer provided + */ +#define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD 5 + +struct vfio_region_info_cap_nvlink2_lnkspd { + struct vfio_info_cap_header header; + __u32 link_speed; + __u32 __pad; +}; + +/** + * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9, + * struct vfio_irq_info) + * + * Retrieve information about a device IRQ. Caller provides + * struct vfio_irq_info with index value set. Caller sets argsz. + * Implementation of IRQ mapping is bus driver specific. Indexes + * using multiple IRQs are primarily intended to support MSI-like + * interrupt blocks. Zero count irq blocks may be used to describe + * unimplemented interrupt types. + * + * The EVENTFD flag indicates the interrupt index supports eventfd based + * signaling. + * + * The MASKABLE flags indicates the index supports MASK and UNMASK + * actions described below. + * + * AUTOMASKED indicates that after signaling, the interrupt line is + * automatically masked by VFIO and the user needs to unmask the line + * to receive new interrupts. This is primarily intended to distinguish + * level triggered interrupts. + * + * The NORESIZE flag indicates that the interrupt lines within the index + * are setup as a set and new subindexes cannot be enabled without first + * disabling the entire index. This is used for interrupts like PCI MSI + * and MSI-X where the driver may only use a subset of the available + * indexes, but VFIO needs to enable a specific number of vectors + * upfront. In the case of MSI-X, where the user can enable MSI-X and + * then add and unmask vectors, it's up to userspace to make the decision + * whether to allocate the maximum supported number of vectors or tear + * down setup and incrementally increase the vectors as each is enabled. + * Absence of the NORESIZE flag indicates that vectors can be enabled + * and disabled dynamically without impacting other vectors within the + * index. + */ +struct vfio_irq_info { + __u32 argsz; + __u32 flags; +#define VFIO_IRQ_INFO_EVENTFD (1 << 0) +#define VFIO_IRQ_INFO_MASKABLE (1 << 1) +#define VFIO_IRQ_INFO_AUTOMASKED (1 << 2) +#define VFIO_IRQ_INFO_NORESIZE (1 << 3) + __u32 index; /* IRQ index */ + __u32 count; /* Number of IRQs within this index */ +}; +#define VFIO_DEVICE_GET_IRQ_INFO _IO(VFIO_TYPE, VFIO_BASE + 9) + +/** + * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set) + * + * Set signaling, masking, and unmasking of interrupts. Caller provides + * struct vfio_irq_set with all fields set. 'start' and 'count' indicate + * the range of subindexes being specified. + * + * The DATA flags specify the type of data provided. If DATA_NONE, the + * operation performs the specified action immediately on the specified + * interrupt(s). For example, to unmask AUTOMASKED interrupt [0,0]: + * flags = (DATA_NONE|ACTION_UNMASK), index = 0, start = 0, count = 1. + * + * DATA_BOOL allows sparse support for the same on arrays of interrupts. + * For example, to mask interrupts [0,1] and [0,3] (but not [0,2]): + * flags = (DATA_BOOL|ACTION_MASK), index = 0, start = 1, count = 3, + * data = {1,0,1} + * + * DATA_EVENTFD binds the specified ACTION to the provided __s32 eventfd. + * A value of -1 can be used to either de-assign interrupts if already + * assigned or skip un-assigned interrupts. For example, to set an eventfd + * to be trigger for interrupts [0,0] and [0,2]: + * flags = (DATA_EVENTFD|ACTION_TRIGGER), index = 0, start = 0, count = 3, + * data = {fd1, -1, fd2} + * If index [0,1] is previously set, two count = 1 ioctls calls would be + * required to set [0,0] and [0,2] without changing [0,1]. + * + * Once a signaling mechanism is set, DATA_BOOL or DATA_NONE can be used + * with ACTION_TRIGGER to perform kernel level interrupt loopback testing + * from userspace (ie. simulate hardware triggering). + * + * Setting of an event triggering mechanism to userspace for ACTION_TRIGGER + * enables the interrupt index for the device. Individual subindex interrupts + * can be disabled using the -1 value for DATA_EVENTFD or the index can be + * disabled as a whole with: flags = (DATA_NONE|ACTION_TRIGGER), count = 0. + * + * Note that ACTION_[UN]MASK specify user->kernel signaling (irqfds) while + * ACTION_TRIGGER specifies kernel->user signaling. + */ +struct vfio_irq_set { + __u32 argsz; + __u32 flags; +#define VFIO_IRQ_SET_DATA_NONE (1 << 0) /* Data not present */ +#define VFIO_IRQ_SET_DATA_BOOL (1 << 1) /* Data is bool (u8) */ +#define VFIO_IRQ_SET_DATA_EVENTFD (1 << 2) /* Data is eventfd (s32) */ +#define VFIO_IRQ_SET_ACTION_MASK (1 << 3) /* Mask interrupt */ +#define VFIO_IRQ_SET_ACTION_UNMASK (1 << 4) /* Unmask interrupt */ +#define VFIO_IRQ_SET_ACTION_TRIGGER (1 << 5) /* Trigger interrupt */ + __u32 index; + __u32 start; + __u32 count; + __u8 data[]; +}; +#define VFIO_DEVICE_SET_IRQS _IO(VFIO_TYPE, VFIO_BASE + 10) + +#define VFIO_IRQ_SET_DATA_TYPE_MASK (VFIO_IRQ_SET_DATA_NONE | \ + VFIO_IRQ_SET_DATA_BOOL | \ + VFIO_IRQ_SET_DATA_EVENTFD) +#define VFIO_IRQ_SET_ACTION_TYPE_MASK (VFIO_IRQ_SET_ACTION_MASK | \ + VFIO_IRQ_SET_ACTION_UNMASK | \ + VFIO_IRQ_SET_ACTION_TRIGGER) +/** + * VFIO_DEVICE_RESET - _IO(VFIO_TYPE, VFIO_BASE + 11) + * + * Reset a device. + */ +#define VFIO_DEVICE_RESET _IO(VFIO_TYPE, VFIO_BASE + 11) + +/* + * The VFIO-PCI bus driver makes use of the following fixed region and + * IRQ index mapping. Unimplemented regions return a size of zero. + * Unimplemented IRQ types return a count of zero. + */ + +enum { + VFIO_PCI_BAR0_REGION_INDEX, + VFIO_PCI_BAR1_REGION_INDEX, + VFIO_PCI_BAR2_REGION_INDEX, + VFIO_PCI_BAR3_REGION_INDEX, + VFIO_PCI_BAR4_REGION_INDEX, + VFIO_PCI_BAR5_REGION_INDEX, + VFIO_PCI_ROM_REGION_INDEX, + VFIO_PCI_CONFIG_REGION_INDEX, + /* + * Expose VGA regions defined for PCI base class 03, subclass 00. + * This includes I/O port ranges 0x3b0 to 0x3bb and 0x3c0 to 0x3df + * as well as the MMIO range 0xa0000 to 0xbffff. Each implemented + * range is found at it's identity mapped offset from the region + * offset, for example 0x3b0 is region_info.offset + 0x3b0. Areas + * between described ranges are unimplemented. + */ + VFIO_PCI_VGA_REGION_INDEX, + VFIO_PCI_NUM_REGIONS = 9 /* Fixed user ABI, region indexes >=9 use */ + /* device specific cap to define content. */ +}; + +enum { + VFIO_PCI_INTX_IRQ_INDEX, + VFIO_PCI_MSI_IRQ_INDEX, + VFIO_PCI_MSIX_IRQ_INDEX, + VFIO_PCI_ERR_IRQ_INDEX, + VFIO_PCI_REQ_IRQ_INDEX, + VFIO_PCI_NUM_IRQS +}; + +/* + * The vfio-ccw bus driver makes use of the following fixed region and + * IRQ index mapping. Unimplemented regions return a size of zero. + * Unimplemented IRQ types return a count of zero. + */ + +enum { + VFIO_CCW_CONFIG_REGION_INDEX, + VFIO_CCW_NUM_REGIONS +}; + +enum { + VFIO_CCW_IO_IRQ_INDEX, + VFIO_CCW_CRW_IRQ_INDEX, + VFIO_CCW_REQ_IRQ_INDEX, + VFIO_CCW_NUM_IRQS +}; + +/* + * The vfio-ap bus driver makes use of the following IRQ index mapping. + * Unimplemented IRQ types return a count of zero. + */ +enum { + VFIO_AP_REQ_IRQ_INDEX, + VFIO_AP_CFG_CHG_IRQ_INDEX, + VFIO_AP_NUM_IRQS +}; + +/** + * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12, + * struct vfio_pci_hot_reset_info) + * + * This command is used to query the affected devices in the hot reset for + * a given device. + * + * This command always reports the segment, bus, and devfn information for + * each affected device, and selectively reports the group_id or devid per + * the way how the calling device is opened. + * + * - If the calling device is opened via the traditional group/container + * API, group_id is reported. User should check if it has owned all + * the affected devices and provides a set of group fds to prove the + * ownership in VFIO_DEVICE_PCI_HOT_RESET ioctl. + * + * - If the calling device is opened as a cdev, devid is reported. + * Flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is set to indicate this + * data type. All the affected devices should be represented in + * the dev_set, ex. bound to a vfio driver, and also be owned by + * this interface which is determined by the following conditions: + * 1) Has a valid devid within the iommufd_ctx of the calling device. + * Ownership cannot be determined across separate iommufd_ctx and + * the cdev calling conventions do not support a proof-of-ownership + * model as provided in the legacy group interface. In this case + * valid devid with value greater than zero is provided in the return + * structure. + * 2) Does not have a valid devid within the iommufd_ctx of the calling + * device, but belongs to the same IOMMU group as the calling device + * or another opened device that has a valid devid within the + * iommufd_ctx of the calling device. This provides implicit ownership + * for devices within the same DMA isolation context. In this case + * the devid value of VFIO_PCI_DEVID_OWNED is provided in the return + * structure. + * + * A devid value of VFIO_PCI_DEVID_NOT_OWNED is provided in the return + * structure for affected devices where device is NOT represented in the + * dev_set or ownership is not available. Such devices prevent the use + * of VFIO_DEVICE_PCI_HOT_RESET ioctl outside of the proof-of-ownership + * calling conventions (ie. via legacy group accessed devices). Flag + * VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED would be set when all the + * affected devices are represented in the dev_set and also owned by + * the user. This flag is available only when + * flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is set, otherwise reserved. + * When set, user could invoke VFIO_DEVICE_PCI_HOT_RESET with a zero + * length fd array on the calling device as the ownership is validated + * by iommufd_ctx. + * + * Return: 0 on success, -errno on failure: + * -enospc = insufficient buffer, -enodev = unsupported for device. + */ +struct vfio_pci_dependent_device { + union { + __u32 group_id; + __u32 devid; +#define VFIO_PCI_DEVID_OWNED 0 +#define VFIO_PCI_DEVID_NOT_OWNED -1 + }; + __u16 segment; + __u8 bus; + __u8 devfn; /* Use PCI_SLOT/PCI_FUNC */ +}; + +struct vfio_pci_hot_reset_info { + __u32 argsz; + __u32 flags; +#define VFIO_PCI_HOT_RESET_FLAG_DEV_ID (1 << 0) +#define VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED (1 << 1) + __u32 count; + struct vfio_pci_dependent_device devices[]; +}; + +#define VFIO_DEVICE_GET_PCI_HOT_RESET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) + +/** + * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13, + * struct vfio_pci_hot_reset) + * + * A PCI hot reset results in either a bus or slot reset which may affect + * other devices sharing the bus/slot. The calling user must have + * ownership of the full set of affected devices as determined by the + * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl. + * + * When called on a device file descriptor acquired through the vfio + * group interface, the user is required to provide proof of ownership + * of those affected devices via the group_fds array in struct + * vfio_pci_hot_reset. + * + * When called on a direct cdev opened vfio device, the flags field of + * struct vfio_pci_hot_reset_info reports the ownership status of the + * affected devices and this ioctl must be called with an empty group_fds + * array. See above INFO ioctl definition for ownership requirements. + * + * Mixed usage of legacy groups and cdevs across the set of affected + * devices is not supported. + * + * Return: 0 on success, -errno on failure. + */ +struct vfio_pci_hot_reset { + __u32 argsz; + __u32 flags; + __u32 count; + __s32 group_fds[]; +}; + +#define VFIO_DEVICE_PCI_HOT_RESET _IO(VFIO_TYPE, VFIO_BASE + 13) + +/** + * VFIO_DEVICE_QUERY_GFX_PLANE - _IOW(VFIO_TYPE, VFIO_BASE + 14, + * struct vfio_device_query_gfx_plane) + * + * Set the drm_plane_type and flags, then retrieve the gfx plane info. + * + * flags supported: + * - VFIO_GFX_PLANE_TYPE_PROBE and VFIO_GFX_PLANE_TYPE_DMABUF are set + * to ask if the mdev supports dma-buf. 0 on support, -EINVAL on no + * support for dma-buf. + * - VFIO_GFX_PLANE_TYPE_PROBE and VFIO_GFX_PLANE_TYPE_REGION are set + * to ask if the mdev supports region. 0 on support, -EINVAL on no + * support for region. + * - VFIO_GFX_PLANE_TYPE_DMABUF or VFIO_GFX_PLANE_TYPE_REGION is set + * with each call to query the plane info. + * - Others are invalid and return -EINVAL. + * + * Note: + * 1. Plane could be disabled by guest. In that case, success will be + * returned with zero-initialized drm_format, size, width and height + * fields. + * 2. x_hot/y_hot is set to 0xFFFFFFFF if no hotspot information available + * + * Return: 0 on success, -errno on other failure. + */ +struct vfio_device_gfx_plane_info { + __u32 argsz; + __u32 flags; +#define VFIO_GFX_PLANE_TYPE_PROBE (1 << 0) +#define VFIO_GFX_PLANE_TYPE_DMABUF (1 << 1) +#define VFIO_GFX_PLANE_TYPE_REGION (1 << 2) + /* in */ + __u32 drm_plane_type; /* type of plane: DRM_PLANE_TYPE_* */ + /* out */ + __u32 drm_format; /* drm format of plane */ + __aligned_u64 drm_format_mod; /* tiled mode */ + __u32 width; /* width of plane */ + __u32 height; /* height of plane */ + __u32 stride; /* stride of plane */ + __u32 size; /* size of plane in bytes, align on page*/ + __u32 x_pos; /* horizontal position of cursor plane */ + __u32 y_pos; /* vertical position of cursor plane*/ + __u32 x_hot; /* horizontal position of cursor hotspot */ + __u32 y_hot; /* vertical position of cursor hotspot */ + union { + __u32 region_index; /* region index */ + __u32 dmabuf_id; /* dma-buf id */ + }; + __u32 reserved; +}; + +#define VFIO_DEVICE_QUERY_GFX_PLANE _IO(VFIO_TYPE, VFIO_BASE + 14) + +/** + * VFIO_DEVICE_GET_GFX_DMABUF - _IOW(VFIO_TYPE, VFIO_BASE + 15, __u32) + * + * Return a new dma-buf file descriptor for an exposed guest framebuffer + * described by the provided dmabuf_id. The dmabuf_id is returned from VFIO_ + * DEVICE_QUERY_GFX_PLANE as a token of the exposed guest framebuffer. + */ + +#define VFIO_DEVICE_GET_GFX_DMABUF _IO(VFIO_TYPE, VFIO_BASE + 15) + +/** + * VFIO_DEVICE_IOEVENTFD - _IOW(VFIO_TYPE, VFIO_BASE + 16, + * struct vfio_device_ioeventfd) + * + * Perform a write to the device at the specified device fd offset, with + * the specified data and width when the provided eventfd is triggered. + * vfio bus drivers may not support this for all regions, for all widths, + * or at all. vfio-pci currently only enables support for BAR regions, + * excluding the MSI-X vector table. + * + * Return: 0 on success, -errno on failure. + */ +struct vfio_device_ioeventfd { + __u32 argsz; + __u32 flags; +#define VFIO_DEVICE_IOEVENTFD_8 (1 << 0) /* 1-byte write */ +#define VFIO_DEVICE_IOEVENTFD_16 (1 << 1) /* 2-byte write */ +#define VFIO_DEVICE_IOEVENTFD_32 (1 << 2) /* 4-byte write */ +#define VFIO_DEVICE_IOEVENTFD_64 (1 << 3) /* 8-byte write */ +#define VFIO_DEVICE_IOEVENTFD_SIZE_MASK (0xf) + __aligned_u64 offset; /* device fd offset of write */ + __aligned_u64 data; /* data to be written */ + __s32 fd; /* -1 for de-assignment */ + __u32 reserved; +}; + +#define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16) + +/** + * VFIO_DEVICE_FEATURE - _IOWR(VFIO_TYPE, VFIO_BASE + 17, + * struct vfio_device_feature) + * + * Get, set, or probe feature data of the device. The feature is selected + * using the FEATURE_MASK portion of the flags field. Support for a feature + * can be probed by setting both the FEATURE_MASK and PROBE bits. A probe + * may optionally include the GET and/or SET bits to determine read vs write + * access of the feature respectively. Probing a feature will return success + * if the feature is supported and all of the optionally indicated GET/SET + * methods are supported. The format of the data portion of the structure is + * specific to the given feature. The data portion is not required for + * probing. GET and SET are mutually exclusive, except for use with PROBE. + * + * Return 0 on success, -errno on failure. + */ +struct vfio_device_feature { + __u32 argsz; + __u32 flags; +#define VFIO_DEVICE_FEATURE_MASK (0xffff) /* 16-bit feature index */ +#define VFIO_DEVICE_FEATURE_GET (1 << 16) /* Get feature into data[] */ +#define VFIO_DEVICE_FEATURE_SET (1 << 17) /* Set feature from data[] */ +#define VFIO_DEVICE_FEATURE_PROBE (1 << 18) /* Probe feature support */ + __u8 data[]; +}; + +#define VFIO_DEVICE_FEATURE _IO(VFIO_TYPE, VFIO_BASE + 17) + +/* + * VFIO_DEVICE_BIND_IOMMUFD - _IOR(VFIO_TYPE, VFIO_BASE + 18, + * struct vfio_device_bind_iommufd) + * @argsz: User filled size of this data. + * @flags: Must be 0. + * @iommufd: iommufd to bind. + * @out_devid: The device id generated by this bind. devid is a handle for + * this device/iommufd bond and can be used in IOMMUFD commands. + * + * Bind a vfio_device to the specified iommufd. + * + * User is restricted from accessing the device before the binding operation + * is completed. Only allowed on cdev fds. + * + * Unbind is automatically conducted when device fd is closed. + * + * Return: 0 on success, -errno on failure. + */ +struct vfio_device_bind_iommufd { + __u32 argsz; + __u32 flags; + __s32 iommufd; + __u32 out_devid; +}; + +#define VFIO_DEVICE_BIND_IOMMUFD _IO(VFIO_TYPE, VFIO_BASE + 18) + +/* + * VFIO_DEVICE_ATTACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 19, + * struct vfio_device_attach_iommufd_pt) + * @argsz: User filled size of this data. + * @flags: Flags for attach. + * @pt_id: Input the target id which can represent an ioas or a hwpt + * allocated via iommufd subsystem. + * Output the input ioas id or the attached hwpt id which could + * be the specified hwpt itself or a hwpt automatically created + * for the specified ioas by kernel during the attachment. + * @pasid: The pasid to be attached, only meaningful when + * VFIO_DEVICE_ATTACH_PASID is set in @flags + * + * Associate the device with an address space within the bound iommufd. + * Undo by VFIO_DEVICE_DETACH_IOMMUFD_PT or device fd close. This is only + * allowed on cdev fds. + * + * If a vfio device or a pasid of this device is currently attached to a valid + * hw_pagetable (hwpt), without doing a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second + * VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl passing in another hwpt id is allowed. + * This action, also known as a hw_pagetable replacement, will replace the + * currently attached hwpt of the device or the pasid of this device with a new + * hwpt corresponding to the given pt_id. + * + * Return: 0 on success, -errno on failure. + */ +struct vfio_device_attach_iommufd_pt { + __u32 argsz; + __u32 flags; +#define VFIO_DEVICE_ATTACH_PASID (1 << 0) + __u32 pt_id; + __u32 pasid; +}; + +#define VFIO_DEVICE_ATTACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 19) + +/* + * VFIO_DEVICE_DETACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 20, + * struct vfio_device_detach_iommufd_pt) + * @argsz: User filled size of this data. + * @flags: Flags for detach. + * @pasid: The pasid to be detached, only meaningful when + * VFIO_DEVICE_DETACH_PASID is set in @flags + * + * Remove the association of the device or a pasid of the device and its current + * associated address space. After it, the device or the pasid should be in a + * blocking DMA state. This is only allowed on cdev fds. + * + * Return: 0 on success, -errno on failure. + */ +struct vfio_device_detach_iommufd_pt { + __u32 argsz; + __u32 flags; +#define VFIO_DEVICE_DETACH_PASID (1 << 0) + __u32 pasid; +}; + +#define VFIO_DEVICE_DETACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 20) + +/* + * Provide support for setting a PCI VF Token, which is used as a shared + * secret between PF and VF drivers. This feature may only be set on a + * PCI SR-IOV PF when SR-IOV is enabled on the PF and there are no existing + * open VFs. Data provided when setting this feature is a 16-byte array + * (__u8 b[16]), representing a UUID. + */ +#define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0) + +/* + * Indicates the device can support the migration API through + * VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. If this GET succeeds, the RUNNING and + * ERROR states are always supported. Support for additional states is + * indicated via the flags field; at least VFIO_MIGRATION_STOP_COPY must be + * set. + * + * VFIO_MIGRATION_STOP_COPY means that STOP, STOP_COPY and + * RESUMING are supported. + * + * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P means that RUNNING_P2P + * is supported in addition to the STOP_COPY states. + * + * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY means that + * PRE_COPY is supported in addition to the STOP_COPY states. + * + * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P | VFIO_MIGRATION_PRE_COPY + * means that RUNNING_P2P, PRE_COPY and PRE_COPY_P2P are supported + * in addition to the STOP_COPY states. + * + * Other combinations of flags have behavior to be defined in the future. + */ +struct vfio_device_feature_migration { + __aligned_u64 flags; +#define VFIO_MIGRATION_STOP_COPY (1 << 0) +#define VFIO_MIGRATION_P2P (1 << 1) +#define VFIO_MIGRATION_PRE_COPY (1 << 2) +}; +#define VFIO_DEVICE_FEATURE_MIGRATION 1 + +/* + * Upon VFIO_DEVICE_FEATURE_SET, execute a migration state change on the VFIO + * device. The new state is supplied in device_state, see enum + * vfio_device_mig_state for details + * + * The kernel migration driver must fully transition the device to the new state + * value before the operation returns to the user. + * + * The kernel migration driver must not generate asynchronous device state + * transitions outside of manipulation by the user or the VFIO_DEVICE_RESET + * ioctl as described above. + * + * If this function fails then current device_state may be the original + * operating state or some other state along the combination transition path. + * The user can then decide if it should execute a VFIO_DEVICE_RESET, attempt + * to return to the original state, or attempt to return to some other state + * such as RUNNING or STOP. + * + * If the new_state starts a new data transfer session then the FD associated + * with that session is returned in data_fd. The user is responsible to close + * this FD when it is finished. The user must consider the migration data stream + * carried over the FD to be opaque and must preserve the byte order of the + * stream. The user is not required to preserve buffer segmentation when writing + * the data stream during the RESUMING operation. + * + * Upon VFIO_DEVICE_FEATURE_GET, get the current migration state of the VFIO + * device, data_fd will be -1. + */ +struct vfio_device_feature_mig_state { + __u32 device_state; /* From enum vfio_device_mig_state */ + __s32 data_fd; +}; +#define VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE 2 + +/* + * The device migration Finite State Machine is described by the enum + * vfio_device_mig_state. Some of the FSM arcs will create a migration data + * transfer session by returning a FD, in this case the migration data will + * flow over the FD using read() and write() as discussed below. + * + * There are 5 states to support VFIO_MIGRATION_STOP_COPY: + * RUNNING - The device is running normally + * STOP - The device does not change the internal or external state + * STOP_COPY - The device internal state can be read out + * RESUMING - The device is stopped and is loading a new internal state + * ERROR - The device has failed and must be reset + * + * And optional states to support VFIO_MIGRATION_P2P: + * RUNNING_P2P - RUNNING, except the device cannot do peer to peer DMA + * And VFIO_MIGRATION_PRE_COPY: + * PRE_COPY - The device is running normally but tracking internal state + * changes + * And VFIO_MIGRATION_P2P | VFIO_MIGRATION_PRE_COPY: + * PRE_COPY_P2P - PRE_COPY, except the device cannot do peer to peer DMA + * + * The FSM takes actions on the arcs between FSM states. The driver implements + * the following behavior for the FSM arcs: + * + * RUNNING_P2P -> STOP + * STOP_COPY -> STOP + * While in STOP the device must stop the operation of the device. The device + * must not generate interrupts, DMA, or any other change to external state. + * It must not change its internal state. When stopped the device and kernel + * migration driver must accept and respond to interaction to support external + * subsystems in the STOP state, for example PCI MSI-X and PCI config space. + * Failure by the user to restrict device access while in STOP must not result + * in error conditions outside the user context (ex. host system faults). + * + * The STOP_COPY arc will terminate a data transfer session. + * + * RESUMING -> STOP + * Leaving RESUMING terminates a data transfer session and indicates the + * device should complete processing of the data delivered by write(). The + * kernel migration driver should complete the incorporation of data written + * to the data transfer FD into the device internal state and perform + * final validity and consistency checking of the new device state. If the + * user provided data is found to be incomplete, inconsistent, or otherwise + * invalid, the migration driver must fail the SET_STATE ioctl and + * optionally go to the ERROR state as described below. + * + * While in STOP the device has the same behavior as other STOP states + * described above. + * + * To abort a RESUMING session the device must be reset. + * + * PRE_COPY -> RUNNING + * RUNNING_P2P -> RUNNING + * While in RUNNING the device is fully operational, the device may generate + * interrupts, DMA, respond to MMIO, all vfio device regions are functional, + * and the device may advance its internal state. + * + * The PRE_COPY arc will terminate a data transfer session. + * + * PRE_COPY_P2P -> RUNNING_P2P + * RUNNING -> RUNNING_P2P + * STOP -> RUNNING_P2P + * While in RUNNING_P2P the device is partially running in the P2P quiescent + * state defined below. + * + * The PRE_COPY_P2P arc will terminate a data transfer session. + * + * RUNNING -> PRE_COPY + * RUNNING_P2P -> PRE_COPY_P2P + * STOP -> STOP_COPY + * PRE_COPY, PRE_COPY_P2P and STOP_COPY form the "saving group" of states + * which share a data transfer session. Moving between these states alters + * what is streamed in session, but does not terminate or otherwise affect + * the associated fd. + * + * These arcs begin the process of saving the device state and will return a + * new data_fd. The migration driver may perform actions such as enabling + * dirty logging of device state when entering PRE_COPY or PER_COPY_P2P. + * + * Each arc does not change the device operation, the device remains + * RUNNING, P2P quiesced or in STOP. The STOP_COPY state is described below + * in PRE_COPY_P2P -> STOP_COPY. + * + * PRE_COPY -> PRE_COPY_P2P + * Entering PRE_COPY_P2P continues all the behaviors of PRE_COPY above. + * However, while in the PRE_COPY_P2P state, the device is partially running + * in the P2P quiescent state defined below, like RUNNING_P2P. + * + * PRE_COPY_P2P -> PRE_COPY + * This arc allows returning the device to a full RUNNING behavior while + * continuing all the behaviors of PRE_COPY. + * + * PRE_COPY_P2P -> STOP_COPY + * While in the STOP_COPY state the device has the same behavior as STOP + * with the addition that the data transfers session continues to stream the + * migration state. End of stream on the FD indicates the entire device + * state has been transferred. + * + * The user should take steps to restrict access to vfio device regions while + * the device is in STOP_COPY or risk corruption of the device migration data + * stream. + * + * STOP -> RESUMING + * Entering the RESUMING state starts a process of restoring the device state + * and will return a new data_fd. The data stream fed into the data_fd should + * be taken from the data transfer output of a single FD during saving from + * a compatible device. The migration driver may alter/reset the internal + * device state for this arc if required to prepare the device to receive the + * migration data. + * + * STOP_COPY -> PRE_COPY + * STOP_COPY -> PRE_COPY_P2P + * These arcs are not permitted and return error if requested. Future + * revisions of this API may define behaviors for these arcs, in this case + * support will be discoverable by a new flag in + * VFIO_DEVICE_FEATURE_MIGRATION. + * + * any -> ERROR + * ERROR cannot be specified as a device state, however any transition request + * can be failed with an errno return and may then move the device_state into + * ERROR. In this case the device was unable to execute the requested arc and + * was also unable to restore the device to any valid device_state. + * To recover from ERROR VFIO_DEVICE_RESET must be used to return the + * device_state back to RUNNING. + * + * The optional peer to peer (P2P) quiescent state is intended to be a quiescent + * state for the device for the purposes of managing multiple devices within a + * user context where peer-to-peer DMA between devices may be active. The + * RUNNING_P2P and PRE_COPY_P2P states must prevent the device from initiating + * any new P2P DMA transactions. If the device can identify P2P transactions + * then it can stop only P2P DMA, otherwise it must stop all DMA. The migration + * driver must complete any such outstanding operations prior to completing the + * FSM arc into a P2P state. For the purpose of specification the states + * behave as though the device was fully running if not supported. Like while in + * STOP or STOP_COPY the user must not touch the device, otherwise the state + * can be exited. + * + * The remaining possible transitions are interpreted as combinations of the + * above FSM arcs. As there are multiple paths through the FSM arcs the path + * should be selected based on the following rules: + * - Select the shortest path. + * - The path cannot have saving group states as interior arcs, only + * starting/end states. + * Refer to vfio_mig_get_next_state() for the result of the algorithm. + * + * The automatic transit through the FSM arcs that make up the combination + * transition is invisible to the user. When working with combination arcs the + * user may see any step along the path in the device_state if SET_STATE + * fails. When handling these types of errors users should anticipate future + * revisions of this protocol using new states and those states becoming + * visible in this case. + * + * The optional states cannot be used with SET_STATE if the device does not + * support them. The user can discover if these states are supported by using + * VFIO_DEVICE_FEATURE_MIGRATION. By using combination transitions the user can + * avoid knowing about these optional states if the kernel driver supports them. + * + * Arcs touching PRE_COPY and PRE_COPY_P2P are removed if support for PRE_COPY + * is not present. + */ +enum vfio_device_mig_state { + VFIO_DEVICE_STATE_ERROR = 0, + VFIO_DEVICE_STATE_STOP = 1, + VFIO_DEVICE_STATE_RUNNING = 2, + VFIO_DEVICE_STATE_STOP_COPY = 3, + VFIO_DEVICE_STATE_RESUMING = 4, + VFIO_DEVICE_STATE_RUNNING_P2P = 5, + VFIO_DEVICE_STATE_PRE_COPY = 6, + VFIO_DEVICE_STATE_PRE_COPY_P2P = 7, + VFIO_DEVICE_STATE_NR, +}; + +/** + * VFIO_MIG_GET_PRECOPY_INFO - _IO(VFIO_TYPE, VFIO_BASE + 21) + * + * This ioctl is used on the migration data FD in the precopy phase of the + * migration data transfer. It returns an estimate of the current data sizes + * remaining to be transferred. It allows the user to judge when it is + * appropriate to leave PRE_COPY for STOP_COPY. + * + * This ioctl is valid only in PRE_COPY states and kernel driver should + * return -EINVAL from any other migration state. + * + * The vfio_precopy_info data structure returned by this ioctl provides + * estimates of data available from the device during the PRE_COPY states. + * This estimate is split into two categories, initial_bytes and + * dirty_bytes. + * + * The initial_bytes field indicates the amount of initial precopy + * data available from the device. This field should have a non-zero initial + * value and decrease as migration data is read from the device. + * It is recommended to leave PRE_COPY for STOP_COPY only after this field + * reaches zero. Leaving PRE_COPY earlier might make things slower. + * + * The dirty_bytes field tracks device state changes relative to data + * previously retrieved. This field starts at zero and may increase as + * the internal device state is modified or decrease as that modified + * state is read from the device. + * + * Userspace may use the combination of these fields to estimate the + * potential data size available during the PRE_COPY phases, as well as + * trends relative to the rate the device is dirtying its internal + * state, but these fields are not required to have any bearing relative + * to the data size available during the STOP_COPY phase. + * + * Drivers have a lot of flexibility in when and what they transfer during the + * PRE_COPY phase, and how they report this from VFIO_MIG_GET_PRECOPY_INFO. + * + * During pre-copy the migration data FD has a temporary "end of stream" that is + * reached when both initial_bytes and dirty_byte are zero. For instance, this + * may indicate that the device is idle and not currently dirtying any internal + * state. When read() is done on this temporary end of stream the kernel driver + * should return ENOMSG from read(). Userspace can wait for more data (which may + * never come) by using poll. + * + * Once in STOP_COPY the migration data FD has a permanent end of stream + * signaled in the usual way by read() always returning 0 and poll always + * returning readable. ENOMSG may not be returned in STOP_COPY. + * Support for this ioctl is mandatory if a driver claims to support + * VFIO_MIGRATION_PRE_COPY. + * + * Return: 0 on success, -1 and errno set on failure. + */ +struct vfio_precopy_info { + __u32 argsz; + __u32 flags; + __aligned_u64 initial_bytes; + __aligned_u64 dirty_bytes; +}; + +#define VFIO_MIG_GET_PRECOPY_INFO _IO(VFIO_TYPE, VFIO_BASE + 21) + +/* + * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power + * state with the platform-based power management. Device use of lower power + * states depends on factors managed by the runtime power management core, + * including system level support and coordinating support among dependent + * devices. Enabling device low power entry does not guarantee lower power + * usage by the device, nor is a mechanism provided through this feature to + * know the current power state of the device. If any device access happens + * (either from the host or through the vfio uAPI) when the device is in the + * low power state, then the host will move the device out of the low power + * state as necessary prior to the access. Once the access is completed, the + * device may re-enter the low power state. For single shot low power support + * with wake-up notification, see + * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP below. Access to mmap'd + * device regions is disabled on LOW_POWER_ENTRY and may only be resumed after + * calling LOW_POWER_EXIT. + */ +#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY 3 + +/* + * This device feature has the same behavior as + * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY with the exception that the user + * provides an eventfd for wake-up notification. When the device moves out of + * the low power state for the wake-up, the host will not allow the device to + * re-enter a low power state without a subsequent user call to one of the low + * power entry device feature IOCTLs. Access to mmap'd device regions is + * disabled on LOW_POWER_ENTRY_WITH_WAKEUP and may only be resumed after the + * low power exit. The low power exit can happen either through LOW_POWER_EXIT + * or through any other access (where the wake-up notification has been + * generated). The access to mmap'd device regions will not trigger low power + * exit. + * + * The notification through the provided eventfd will be generated only when + * the device has entered and is resumed from a low power state after + * calling this device feature IOCTL. A device that has not entered low power + * state, as managed through the runtime power management core, will not + * generate a notification through the provided eventfd on access. Calling the + * LOW_POWER_EXIT feature is optional in the case where notification has been + * signaled on the provided eventfd that a resume from low power has occurred. + */ +struct vfio_device_low_power_entry_with_wakeup { + __s32 wakeup_eventfd; + __u32 reserved; +}; + +#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP 4 + +/* + * Upon VFIO_DEVICE_FEATURE_SET, disallow use of device low power states as + * previously enabled via VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY or + * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP device features. + * This device feature IOCTL may itself generate a wakeup eventfd notification + * in the latter case if the device had previously entered a low power state. + */ +#define VFIO_DEVICE_FEATURE_LOW_POWER_EXIT 5 + +/* + * Upon VFIO_DEVICE_FEATURE_SET start/stop device DMA logging. + * VFIO_DEVICE_FEATURE_PROBE can be used to detect if the device supports + * DMA logging. + * + * DMA logging allows a device to internally record what DMAs the device is + * initiating and report them back to userspace. It is part of the VFIO + * migration infrastructure that allows implementing dirty page tracking + * during the pre copy phase of live migration. Only DMA WRITEs are logged, + * and this API is not connected to VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. + * + * When DMA logging is started a range of IOVAs to monitor is provided and the + * device can optimize its logging to cover only the IOVA range given. Each + * DMA that the device initiates inside the range will be logged by the device + * for later retrieval. + * + * page_size is an input that hints what tracking granularity the device + * should try to achieve. If the device cannot do the hinted page size then + * it's the driver choice which page size to pick based on its support. + * On output the device will return the page size it selected. + * + * ranges is a pointer to an array of + * struct vfio_device_feature_dma_logging_range. + * + * The core kernel code guarantees to support by minimum num_ranges that fit + * into a single kernel page. User space can try higher values but should give + * up if the above can't be achieved as of some driver limitations. + * + * A single call to start device DMA logging can be issued and a matching stop + * should follow at the end. Another start is not allowed in the meantime. + */ +struct vfio_device_feature_dma_logging_control { + __aligned_u64 page_size; + __u32 num_ranges; + __u32 __reserved; + __aligned_u64 ranges; +}; + +struct vfio_device_feature_dma_logging_range { + __aligned_u64 iova; + __aligned_u64 length; +}; + +#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6 + +/* + * Upon VFIO_DEVICE_FEATURE_SET stop device DMA logging that was started + * by VFIO_DEVICE_FEATURE_DMA_LOGGING_START + */ +#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7 + +/* + * Upon VFIO_DEVICE_FEATURE_GET read back and clear the device DMA log + * + * Query the device's DMA log for written pages within the given IOVA range. + * During querying the log is cleared for the IOVA range. + * + * bitmap is a pointer to an array of u64s that will hold the output bitmap + * with 1 bit reporting a page_size unit of IOVA. The mapping of IOVA to bits + * is given by: + * bitmap[(addr - iova)/page_size] & (1ULL << (addr % 64)) + * + * The input page_size can be any power of two value and does not have to + * match the value given to VFIO_DEVICE_FEATURE_DMA_LOGGING_START. The driver + * will format its internal logging to match the reporting page size, possibly + * by replicating bits if the internal page size is lower than requested. + * + * The LOGGING_REPORT will only set bits in the bitmap and never clear or + * perform any initialization of the user provided bitmap. + * + * If any error is returned userspace should assume that the dirty log is + * corrupted. Error recovery is to consider all memory dirty and try to + * restart the dirty tracking, or to abort/restart the whole migration. + * + * If DMA logging is not enabled, an error will be returned. + * + */ +struct vfio_device_feature_dma_logging_report { + __aligned_u64 iova; + __aligned_u64 length; + __aligned_u64 page_size; + __aligned_u64 bitmap; +}; + +#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8 + +/* + * Upon VFIO_DEVICE_FEATURE_GET read back the estimated data length that will + * be required to complete stop copy. + * + * Note: Can be called on each device state. + */ + +struct vfio_device_feature_mig_data_size { + __aligned_u64 stop_copy_length; +}; + +#define VFIO_DEVICE_FEATURE_MIG_DATA_SIZE 9 + +/** + * Upon VFIO_DEVICE_FEATURE_SET, set or clear the BUS mastering for the device + * based on the operation specified in op flag. + * + * The functionality is incorporated for devices that needs bus master control, + * but the in-band device interface lacks the support. Consequently, it is not + * applicable to PCI devices, as bus master control for PCI devices is managed + * in-band through the configuration space. At present, this feature is supported + * only for CDX devices. + * When the device's BUS MASTER setting is configured as CLEAR, it will result in + * blocking all incoming DMA requests from the device. On the other hand, configuring + * the device's BUS MASTER setting as SET (enable) will grant the device the + * capability to perform DMA to the host memory. + */ +struct vfio_device_feature_bus_master { + __u32 op; +#define VFIO_DEVICE_FEATURE_CLEAR_MASTER 0 /* Clear Bus Master */ +#define VFIO_DEVICE_FEATURE_SET_MASTER 1 /* Set Bus Master */ +}; +#define VFIO_DEVICE_FEATURE_BUS_MASTER 10 + +/* -------- API for Type1 VFIO IOMMU -------- */ + +/** + * VFIO_IOMMU_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 12, struct vfio_iommu_info) + * + * Retrieve information about the IOMMU object. Fills in provided + * struct vfio_iommu_info. Caller sets argsz. + * + * XXX Should we do these by CHECK_EXTENSION too? + */ +struct vfio_iommu_type1_info { + __u32 argsz; + __u32 flags; +#define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */ +#define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */ + __aligned_u64 iova_pgsizes; /* Bitmap of supported page sizes */ + __u32 cap_offset; /* Offset within info struct of first cap */ + __u32 pad; +}; + +/* + * The IOVA capability allows to report the valid IOVA range(s) + * excluding any non-relaxable reserved regions exposed by + * devices attached to the container. Any DMA map attempt + * outside the valid iova range will return error. + * + * The structures below define version 1 of this capability. + */ +#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE 1 + +struct vfio_iova_range { + __u64 start; + __u64 end; +}; + +struct vfio_iommu_type1_info_cap_iova_range { + struct vfio_info_cap_header header; + __u32 nr_iovas; + __u32 reserved; + struct vfio_iova_range iova_ranges[]; +}; + +/* + * The migration capability allows to report supported features for migration. + * + * The structures below define version 1 of this capability. + * + * The existence of this capability indicates that IOMMU kernel driver supports + * dirty page logging. + * + * pgsize_bitmap: Kernel driver returns bitmap of supported page sizes for dirty + * page logging. + * max_dirty_bitmap_size: Kernel driver returns maximum supported dirty bitmap + * size in bytes that can be used by user applications when getting the dirty + * bitmap. + */ +#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 2 + +struct vfio_iommu_type1_info_cap_migration { + struct vfio_info_cap_header header; + __u32 flags; + __u64 pgsize_bitmap; + __u64 max_dirty_bitmap_size; /* in bytes */ +}; + +/* + * The DMA available capability allows to report the current number of + * simultaneously outstanding DMA mappings that are allowed. + * + * The structure below defines version 1 of this capability. + * + * avail: specifies the current number of outstanding DMA mappings allowed. + */ +#define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3 + +struct vfio_iommu_type1_info_dma_avail { + struct vfio_info_cap_header header; + __u32 avail; +}; + +#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) + +/** + * VFIO_IOMMU_MAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 13, struct vfio_dma_map) + * + * Map process virtual addresses to IO virtual addresses using the + * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required. + * + * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova. The vaddr + * must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR. To + * maintain memory consistency within the user application, the updated vaddr + * must address the same memory object as originally mapped. Failure to do so + * will result in user memory corruption and/or device misbehavior. iova and + * size must match those in the original MAP_DMA call. Protection is not + * changed, and the READ & WRITE flags must be 0. + */ +struct vfio_iommu_type1_dma_map { + __u32 argsz; + __u32 flags; +#define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */ +#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */ +#define VFIO_DMA_MAP_FLAG_VADDR (1 << 2) + __u64 vaddr; /* Process virtual address */ + __u64 iova; /* IO virtual address */ + __u64 size; /* Size of mapping (bytes) */ +}; + +#define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13) + +struct vfio_bitmap { + __u64 pgsize; /* page size for bitmap in bytes */ + __u64 size; /* in bytes */ + __u64 *data; /* one bit per page */ +}; + +/** + * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14, + * struct vfio_dma_unmap) + * + * Unmap IO virtual addresses using the provided struct vfio_dma_unmap. + * Caller sets argsz. The actual unmapped size is returned in the size + * field. No guarantee is made to the user that arbitrary unmaps of iova + * or size different from those used in the original mapping call will + * succeed. + * + * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap + * before unmapping IO virtual addresses. When this flag is set, the user must + * provide a struct vfio_bitmap in data[]. User must provide zero-allocated + * memory via vfio_bitmap.data and its size in the vfio_bitmap.size field. + * A bit in the bitmap represents one page, of user provided page size in + * vfio_bitmap.pgsize field, consecutively starting from iova offset. Bit set + * indicates that the page at that offset from iova is dirty. A Bitmap of the + * pages in the range of unmapped size is returned in the user-provided + * vfio_bitmap.data. + * + * If flags & VFIO_DMA_UNMAP_FLAG_ALL, unmap all addresses. iova and size + * must be 0. This cannot be combined with the get-dirty-bitmap flag. + * + * If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host + * virtual addresses in the iova range. DMA to already-mapped pages continues. + * Groups may not be added to the container while any addresses are invalid. + * This cannot be combined with the get-dirty-bitmap flag. + */ +struct vfio_iommu_type1_dma_unmap { + __u32 argsz; + __u32 flags; +#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0) +#define VFIO_DMA_UNMAP_FLAG_ALL (1 << 1) +#define VFIO_DMA_UNMAP_FLAG_VADDR (1 << 2) + __u64 iova; /* IO virtual address */ + __u64 size; /* Size of mapping (bytes) */ + __u8 data[]; +}; + +#define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) + +/* + * IOCTLs to enable/disable IOMMU container usage. + * No parameters are supported. + */ +#define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) +#define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) + +/** + * VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17, + * struct vfio_iommu_type1_dirty_bitmap) + * IOCTL is used for dirty pages logging. + * Caller should set flag depending on which operation to perform, details as + * below: + * + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_START flag set, instructs + * the IOMMU driver to log pages that are dirtied or potentially dirtied by + * the device; designed to be used when a migration is in progress. Dirty pages + * are logged until logging is disabled by user application by calling the IOCTL + * with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag. + * + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag set, instructs + * the IOMMU driver to stop logging dirtied pages. + * + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set + * returns the dirty pages bitmap for IOMMU container for a given IOVA range. + * The user must specify the IOVA range and the pgsize through the structure + * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface + * supports getting a bitmap of the smallest supported pgsize only and can be + * modified in future to get a bitmap of any specified supported pgsize. The + * user must provide a zeroed memory area for the bitmap memory and specify its + * size in bitmap.size. One bit is used to represent one page consecutively + * starting from iova offset. The user should provide page size in bitmap.pgsize + * field. A bit set in the bitmap indicates that the page at that offset from + * iova is dirty. The caller must set argsz to a value including the size of + * structure vfio_iommu_type1_dirty_bitmap_get, but excluding the size of the + * actual bitmap. If dirty pages logging is not enabled, an error will be + * returned. + * + * Only one of the flags _START, _STOP and _GET may be specified at a time. + * + */ +struct vfio_iommu_type1_dirty_bitmap { + __u32 argsz; + __u32 flags; +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0) +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1) +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2) + __u8 data[]; +}; + +struct vfio_iommu_type1_dirty_bitmap_get { + __u64 iova; /* IO virtual address */ + __u64 size; /* Size of iova range */ + struct vfio_bitmap bitmap; +}; + +#define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17) + +/* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ + +/* + * The SPAPR TCE DDW info struct provides the information about + * the details of Dynamic DMA window capability. + * + * @pgsizes contains a page size bitmask, 4K/64K/16M are supported. + * @max_dynamic_windows_supported tells the maximum number of windows + * which the platform can create. + * @levels tells the maximum number of levels in multi-level IOMMU tables; + * this allows splitting a table into smaller chunks which reduces + * the amount of physically contiguous memory required for the table. + */ +struct vfio_iommu_spapr_tce_ddw_info { + __u64 pgsizes; /* Bitmap of supported page sizes */ + __u32 max_dynamic_windows_supported; + __u32 levels; +}; + +/* + * The SPAPR TCE info struct provides the information about the PCI bus + * address ranges available for DMA, these values are programmed into + * the hardware so the guest has to know that information. + * + * The DMA 32 bit window start is an absolute PCI bus address. + * The IOVA address passed via map/unmap ioctls are absolute PCI bus + * addresses too so the window works as a filter rather than an offset + * for IOVA addresses. + * + * Flags supported: + * - VFIO_IOMMU_SPAPR_INFO_DDW: informs the userspace that dynamic DMA windows + * (DDW) support is present. @ddw is only supported when DDW is present. + */ +struct vfio_iommu_spapr_tce_info { + __u32 argsz; + __u32 flags; +#define VFIO_IOMMU_SPAPR_INFO_DDW (1 << 0) /* DDW supported */ + __u32 dma32_window_start; /* 32 bit window start (bytes) */ + __u32 dma32_window_size; /* 32 bit window size (bytes) */ + struct vfio_iommu_spapr_tce_ddw_info ddw; +}; + +#define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) + +/* + * EEH PE operation struct provides ways to: + * - enable/disable EEH functionality; + * - unfreeze IO/DMA for frozen PE; + * - read PE state; + * - reset PE; + * - configure PE; + * - inject EEH error. + */ +struct vfio_eeh_pe_err { + __u32 type; + __u32 func; + __u64 addr; + __u64 mask; +}; + +struct vfio_eeh_pe_op { + __u32 argsz; + __u32 flags; + __u32 op; + union { + struct vfio_eeh_pe_err err; + }; +}; + +#define VFIO_EEH_PE_DISABLE 0 /* Disable EEH functionality */ +#define VFIO_EEH_PE_ENABLE 1 /* Enable EEH functionality */ +#define VFIO_EEH_PE_UNFREEZE_IO 2 /* Enable IO for frozen PE */ +#define VFIO_EEH_PE_UNFREEZE_DMA 3 /* Enable DMA for frozen PE */ +#define VFIO_EEH_PE_GET_STATE 4 /* PE state retrieval */ +#define VFIO_EEH_PE_STATE_NORMAL 0 /* PE in functional state */ +#define VFIO_EEH_PE_STATE_RESET 1 /* PE reset in progress */ +#define VFIO_EEH_PE_STATE_STOPPED 2 /* Stopped DMA and IO */ +#define VFIO_EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA only */ +#define VFIO_EEH_PE_STATE_UNAVAIL 5 /* State unavailable */ +#define VFIO_EEH_PE_RESET_DEACTIVATE 5 /* Deassert PE reset */ +#define VFIO_EEH_PE_RESET_HOT 6 /* Assert hot reset */ +#define VFIO_EEH_PE_RESET_FUNDAMENTAL 7 /* Assert fundamental reset */ +#define VFIO_EEH_PE_CONFIGURE 8 /* PE configuration */ +#define VFIO_EEH_PE_INJECT_ERR 9 /* Inject EEH error */ + +#define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21) + +/** + * VFIO_IOMMU_SPAPR_REGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 17, struct vfio_iommu_spapr_register_memory) + * + * Registers user space memory where DMA is allowed. It pins + * user pages and does the locked memory accounting so + * subsequent VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA calls + * get faster. + */ +struct vfio_iommu_spapr_register_memory { + __u32 argsz; + __u32 flags; + __u64 vaddr; /* Process virtual address */ + __u64 size; /* Size of mapping (bytes) */ +}; +#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17) + +/** + * VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 18, struct vfio_iommu_spapr_register_memory) + * + * Unregisters user space memory registered with + * VFIO_IOMMU_SPAPR_REGISTER_MEMORY. + * Uses vfio_iommu_spapr_register_memory for parameters. + */ +#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18) + +/** + * VFIO_IOMMU_SPAPR_TCE_CREATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19, struct vfio_iommu_spapr_tce_create) + * + * Creates an additional TCE table and programs it (sets a new DMA window) + * to every IOMMU group in the container. It receives page shift, window + * size and number of levels in the TCE table being created. + * + * It allocates and returns an offset on a PCI bus of the new DMA window. + */ +struct vfio_iommu_spapr_tce_create { + __u32 argsz; + __u32 flags; + /* in */ + __u32 page_shift; + __u32 __resv1; + __u64 window_size; + __u32 levels; + __u32 __resv2; + /* out */ + __u64 start_addr; +}; +#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19) + +/** + * VFIO_IOMMU_SPAPR_TCE_REMOVE - _IOW(VFIO_TYPE, VFIO_BASE + 20, struct vfio_iommu_spapr_tce_remove) + * + * Unprograms a TCE table from all groups in the container and destroys it. + * It receives a PCI bus offset as a window id. + */ +struct vfio_iommu_spapr_tce_remove { + __u32 argsz; + __u32 flags; + /* in */ + __u64 start_addr; +}; +#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20) + +/* ***************************************************************** */ + +#endif /* VFIO_H */ diff --git a/kernel/linux/uapi/version b/kernel/linux/uapi/version index 3c68968f923..966a9983019 100644 --- a/kernel/linux/uapi/version +++ b/kernel/linux/uapi/version @@ -1 +1 @@ -v6.14 +v6.16 diff --git a/lib/bbdev/meson.build b/lib/bbdev/meson.build index 2e48d5f3da3..002fc3f1ac7 100644 --- a/lib/bbdev/meson.build +++ b/lib/bbdev/meson.build @@ -1,10 +1,14 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation -sources = files('rte_bbdev.c', - 'bbdev_trace_points.c') -headers = files('rte_bbdev.h', - 'rte_bbdev_pmd.h', +sources = files( + 'rte_bbdev.c', + 'bbdev_trace_points.c', +) +headers = files( + 'rte_bbdev.h', 'rte_bbdev_op.h', - 'rte_bbdev_trace_fp.h') + 'rte_bbdev_trace_fp.h', +) +driver_sdk_headers = files('rte_bbdev_pmd.h') deps += ['mbuf'] diff --git a/lib/compressdev/rte_comp.h b/lib/compressdev/rte_comp.h index 96d9b276dd7..c5ff708a4cf 100644 --- a/lib/compressdev/rte_comp.h +++ b/lib/compressdev/rte_comp.h @@ -101,6 +101,10 @@ enum rte_comp_op_status { * is not an error case. Output data up to op.produced can be used and * next op in the stream should continue on from op.consumed+1. */ + RTE_COMP_OP_STATUS_CHECKSUM_VALIDATION_FAILED, + /**< Checksum validation failed. Either calculated does checksum not match + * the one provided or there was an error calculating the checksum + */ }; /** Compression Algorithms */ @@ -166,6 +170,10 @@ enum rte_comp_checksum_type { /**< Generates a xxHash-32 checksum, as used by LZ4. * https://github.com/Cyan4973/xxHash/blob/dev/doc/xxhash_spec.md */ + RTE_COMP_CHECKSUM_3GPP_PDCP_UDC, + /**< Generates checksum as defined under Uplink Data Compression + * checksum as defined in the 3GPP PDCP specification + */ }; /** Compression Huffman Type - used by DEFLATE algorithm */ @@ -201,6 +209,11 @@ enum rte_comp_flush_flag { */ }; +#define DEFLATE_MAX_WINDOW_SIZE (1ULL << 15) +/** Max window size supported by deflate. Used when setting or getting pre + * defined dictionaries + */ + /** Compression transform types */ enum rte_comp_xform_type { RTE_COMP_COMPRESS, @@ -228,6 +241,18 @@ enum rte_comp_op_type { struct rte_comp_deflate_params { enum rte_comp_huffman huffman; /**< Compression huffman encoding type */ + uint8_t *dictionary; + /**< Pointer to memory containing dictionary to be used for deflate operations */ + uint16_t dictionary_len; + /**< Length of dictionary to be used */ +}; + +/** Parameters specific to the inflate algorithm */ +struct rte_comp_inflate_params { + uint8_t *dictionary; + /**< Pointer to memory containing dictionary to be used for inflate operations */ + uint16_t dictionary_len; + /**< Length of dictionary to be used */ }; /** @@ -321,6 +346,8 @@ struct rte_comp_decompress_xform { * setup of stream or private_xform should fail. */ union { + struct rte_comp_inflate_params inflate; + /**< Parameters specific to the deflate algorithm */ struct rte_comp_lz4_params lz4; /**< Parameters specific to the LZ4 algorithm */ }; /**< Algorithm specific parameters */ diff --git a/lib/eal/arm/include/meson.build b/lib/eal/arm/include/meson.build index 657bf585699..822ddac5202 100644 --- a/lib/eal/arm/include/meson.build +++ b/lib/eal/arm/include/meson.build @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation. -arch_headers = files( +dpdk_arch_headers += files( 'rte_atomic_32.h', 'rte_atomic_64.h', 'rte_atomic.h', @@ -28,4 +28,3 @@ arch_headers = files( 'rte_spinlock.h', 'rte_vect.h', ) -install_headers(arch_headers, subdir: get_option('include_subdir_arch')) diff --git a/lib/eal/arm/include/rte_memcpy_32.h b/lib/eal/arm/include/rte_memcpy_32.h index 99fd5757ca4..861be06f5e8 100644 --- a/lib/eal/arm/include/rte_memcpy_32.h +++ b/lib/eal/arm/include/rte_memcpy_32.h @@ -19,10 +19,14 @@ /* ARM NEON Intrinsics are used to copy data */ #include +#endif /* RTE_ARCH_ARM_NEON_MEMCPY */ + #ifdef __cplusplus extern "C" { #endif +#ifdef RTE_ARCH_ARM_NEON_MEMCPY + static inline void rte_mov16(uint8_t *dst, const uint8_t *src) { @@ -252,7 +256,7 @@ rte_memcpy_func(void *dst, const void *src, size_t n) return ret; } -#else +#else /* ! RTE_ARCH_ARM_NEON_MEMCPY */ static inline void rte_mov16(uint8_t *dst, const uint8_t *src) diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build index d903577caa3..aef5824e5f6 100644 --- a/lib/eal/include/meson.build +++ b/lib/eal/include/meson.build @@ -64,7 +64,7 @@ driver_sdk_headers = files( ) # special case install the generic headers, since they go in a subdir -generic_headers = files( +dpdk_generic_headers += files( 'generic/rte_atomic.h', 'generic/rte_byteorder.h', 'generic/rte_cpuflags.h', @@ -78,4 +78,3 @@ generic_headers = files( 'generic/rte_spinlock.h', 'generic/rte_vect.h', ) -install_headers(generic_headers, subdir: 'generic') diff --git a/lib/eal/include/rte_vfio.h b/lib/eal/include/rte_vfio.h index 923293040b0..80951517fa5 100644 --- a/lib/eal/include/rte_vfio.h +++ b/lib/eal/include/rte_vfio.h @@ -15,100 +15,24 @@ #include -/* - * determine if VFIO is present on the system - */ -#if !defined(VFIO_PRESENT) && defined(RTE_EAL_VFIO) -#include -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) -#define VFIO_PRESENT -#endif /* kernel version >= 3.6.0 */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) -#define HAVE_VFIO_DEV_REQ_INTERFACE -#endif /* kernel version >= 4.0.0 */ -#endif /* RTE_EAL_VFIO */ - #ifdef __cplusplus extern "C" { #endif -#ifdef VFIO_PRESENT +#ifdef RTE_EXEC_ENV_LINUX -#include - -#define VFIO_DIR "/dev/vfio" -#define VFIO_CONTAINER_PATH "/dev/vfio/vfio" -#define VFIO_GROUP_FMT "/dev/vfio/%u" -#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u" -#define VFIO_GET_REGION_IDX(x) (x >> 40) -#define VFIO_NOIOMMU_MODE \ +#define RTE_VFIO_DIR "/dev/vfio" +#define RTE_VFIO_CONTAINER_PATH "/dev/vfio/vfio" +#define RTE_VFIO_GROUP_FMT "/dev/vfio/%u" +#define RTE_VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u" +#define RTE_VFIO_NOIOMMU_MODE \ "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode" -/* NOIOMMU is defined from kernel version 4.5 onwards */ -#ifdef VFIO_NOIOMMU_IOMMU -#define RTE_VFIO_NOIOMMU VFIO_NOIOMMU_IOMMU -#else -#define RTE_VFIO_NOIOMMU 8 -#endif - -/* - * capabilities are only supported on kernel 4.6+. there were also some API - * changes as well, so add a macro to get cap offset. - */ -#ifdef VFIO_REGION_INFO_FLAG_CAPS -#define RTE_VFIO_INFO_FLAG_CAPS VFIO_REGION_INFO_FLAG_CAPS -#define VFIO_CAP_OFFSET(x) (x->cap_offset) -#else -#define RTE_VFIO_INFO_FLAG_CAPS (1 << 3) -#define VFIO_CAP_OFFSET(x) (x->resv) -struct vfio_info_cap_header { - uint16_t id; - uint16_t version; - uint32_t next; -}; -#endif - -/* kernels 4.16+ can map BAR containing MSI-X table */ -#ifdef VFIO_REGION_INFO_CAP_MSIX_MAPPABLE -#define RTE_VFIO_CAP_MSIX_MAPPABLE VFIO_REGION_INFO_CAP_MSIX_MAPPABLE -#else -#define RTE_VFIO_CAP_MSIX_MAPPABLE 3 -#endif - -/* VFIO_DEVICE_FEATURE is defined for kernel version 5.7 and newer. */ -#ifdef VFIO_DEVICE_FEATURE -#define RTE_VFIO_DEVICE_FEATURE VFIO_DEVICE_FEATURE -#else -#define RTE_VFIO_DEVICE_FEATURE _IO(VFIO_TYPE, VFIO_BASE + 17) -struct vfio_device_feature { - __u32 argsz; - __u32 flags; -#define VFIO_DEVICE_FEATURE_MASK (0xffff) /* 16-bit feature index */ -#define VFIO_DEVICE_FEATURE_GET (1 << 16) /* Get feature into data[] */ -#define VFIO_DEVICE_FEATURE_SET (1 << 17) /* Set feature from data[] */ -#define VFIO_DEVICE_FEATURE_PROBE (1 << 18) /* Probe feature support */ - __u8 data[]; -}; -#endif - -#ifdef VFIO_DEVICE_FEATURE_BUS_MASTER -#define RTE_VFIO_DEVICE_FEATURE_BUS_MASTER VFIO_DEVICE_FEATURE_BUS_MASTER -#else -#define RTE_VFIO_DEVICE_FEATURE_BUS_MASTER 10 -struct vfio_device_feature_bus_master { - __u32 op; -#define VFIO_DEVICE_FEATURE_CLEAR_MASTER 0 /* Clear Bus Master */ -#define VFIO_DEVICE_FEATURE_SET_MASTER 1 /* Set Bus Master */ -}; -#endif - -#else /* not VFIO_PRESENT */ +#endif /* RTE_EXEC_ENV_LINUX */ /* we don't need an actual definition, only pointer is used */ struct vfio_device_info; -#endif /* VFIO_PRESENT */ - #define RTE_VFIO_DEFAULT_CONTAINER_FD (-1) /** diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c index caf22033d0e..6cd9782f40e 100644 --- a/lib/eal/linux/eal.c +++ b/lib/eal/linux/eal.c @@ -1124,14 +1124,12 @@ rte_eal_init(int argc, char **argv) #endif } -#ifdef VFIO_PRESENT if (rte_vfio_enable("vfio")) { rte_eal_init_alert("Cannot init VFIO"); rte_errno = EAGAIN; rte_atomic_store_explicit(&run_once, 0, rte_memory_order_relaxed); return -1; } -#endif /* in secondary processes, memory init may allocate additional fbarrays * not present in primary processes, so to avoid any potential issues, * initialize memzones first. @@ -1334,9 +1332,7 @@ rte_eal_cleanup(void) rte_service_finalize(); eal_bus_cleanup(); -#ifdef VFIO_PRESENT vfio_mp_sync_cleanup(); -#endif rte_mp_channel_cleanup(); rte_eal_alarm_cleanup(); rte_trace_save(); diff --git a/lib/eal/linux/eal_interrupts.c b/lib/eal/linux/eal_interrupts.c index 4ec78de82c4..9db978923aa 100644 --- a/lib/eal/linux/eal_interrupts.c +++ b/lib/eal/linux/eal_interrupts.c @@ -2,6 +2,8 @@ * Copyright(c) 2010-2014 Intel Corporation */ +#include + #include #include #include @@ -28,7 +30,6 @@ #include #include #include -#include #include "eal_private.h" @@ -57,9 +58,7 @@ union intr_pipefds{ */ union rte_intr_read_buffer { int uio_intr_count; /* for uio device */ -#ifdef VFIO_PRESENT uint64_t vfio_intr_count; /* for vfio device */ -#endif uint64_t timerfd_num; /* for timerfd */ char charbuf[16]; /* for others */ }; @@ -95,8 +94,6 @@ static struct rte_intr_source_list intr_sources; static rte_thread_t intr_thread; /* VFIO interrupts */ -#ifdef VFIO_PRESENT - #define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int)) /* irq set buffer length for queue interrupts and LSC interrupt */ #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \ @@ -340,7 +337,6 @@ vfio_disable_msix(const struct rte_intr_handle *intr_handle) { return ret; } -#ifdef HAVE_VFIO_DEV_REQ_INTERFACE /* enable req notifier */ static int vfio_enable_req(const struct rte_intr_handle *intr_handle) @@ -400,8 +396,6 @@ vfio_disable_req(const struct rte_intr_handle *intr_handle) return ret; } -#endif -#endif static int uio_intx_intr_disable(const struct rte_intr_handle *intr_handle) @@ -734,7 +728,6 @@ rte_intr_enable(const struct rte_intr_handle *intr_handle) case RTE_INTR_HANDLE_ALARM: rc = -1; break; -#ifdef VFIO_PRESENT case RTE_INTR_HANDLE_VFIO_MSIX: if (vfio_enable_msix(intr_handle)) rc = -1; @@ -747,13 +740,10 @@ rte_intr_enable(const struct rte_intr_handle *intr_handle) if (vfio_enable_intx(intr_handle)) rc = -1; break; -#ifdef HAVE_VFIO_DEV_REQ_INTERFACE case RTE_INTR_HANDLE_VFIO_REQ: if (vfio_enable_req(intr_handle)) rc = -1; break; -#endif -#endif /* not used at this moment */ case RTE_INTR_HANDLE_DEV_EVENT: rc = -1; @@ -807,7 +797,6 @@ rte_intr_ack(const struct rte_intr_handle *intr_handle) /* not used at this moment */ case RTE_INTR_HANDLE_ALARM: return -1; -#ifdef VFIO_PRESENT /* VFIO MSI* is implicitly acked unlike INTx, nothing to do */ case RTE_INTR_HANDLE_VFIO_MSIX: case RTE_INTR_HANDLE_VFIO_MSI: @@ -816,11 +805,8 @@ rte_intr_ack(const struct rte_intr_handle *intr_handle) if (vfio_ack_intx(intr_handle)) return -1; break; -#ifdef HAVE_VFIO_DEV_REQ_INTERFACE case RTE_INTR_HANDLE_VFIO_REQ: return -1; -#endif -#endif /* not used at this moment */ case RTE_INTR_HANDLE_DEV_EVENT: return -1; @@ -868,7 +854,6 @@ rte_intr_disable(const struct rte_intr_handle *intr_handle) case RTE_INTR_HANDLE_ALARM: rc = -1; break; -#ifdef VFIO_PRESENT case RTE_INTR_HANDLE_VFIO_MSIX: if (vfio_disable_msix(intr_handle)) rc = -1; @@ -881,13 +866,10 @@ rte_intr_disable(const struct rte_intr_handle *intr_handle) if (vfio_disable_intx(intr_handle)) rc = -1; break; -#ifdef HAVE_VFIO_DEV_REQ_INTERFACE case RTE_INTR_HANDLE_VFIO_REQ: if (vfio_disable_req(intr_handle)) rc = -1; break; -#endif -#endif /* not used at this moment */ case RTE_INTR_HANDLE_DEV_EVENT: rc = -1; @@ -948,16 +930,12 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) case RTE_INTR_HANDLE_ALARM: bytes_read = sizeof(buf.timerfd_num); break; -#ifdef VFIO_PRESENT -#ifdef HAVE_VFIO_DEV_REQ_INTERFACE case RTE_INTR_HANDLE_VFIO_REQ: -#endif case RTE_INTR_HANDLE_VFIO_MSIX: case RTE_INTR_HANDLE_VFIO_MSI: case RTE_INTR_HANDLE_VFIO_LEGACY: bytes_read = sizeof(buf.vfio_intr_count); break; -#endif case RTE_INTR_HANDLE_VDEV: case RTE_INTR_HANDLE_EXT: bytes_read = 0; @@ -1221,13 +1199,11 @@ eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) case RTE_INTR_HANDLE_UIO_INTX: bytes_read = sizeof(buf.uio_intr_count); break; -#ifdef VFIO_PRESENT case RTE_INTR_HANDLE_VFIO_MSIX: case RTE_INTR_HANDLE_VFIO_MSI: case RTE_INTR_HANDLE_VFIO_LEGACY: bytes_read = sizeof(buf.vfio_intr_count); break; -#endif case RTE_INTR_HANDLE_VDEV: bytes_read = rte_intr_efd_counter_size_get(intr_handle); /* For vdev, number of bytes to read is set by driver */ diff --git a/lib/eal/linux/eal_vfio.c b/lib/eal/linux/eal_vfio.c index 805f0ff92c3..45c13543908 100644 --- a/lib/eal/linux/eal_vfio.c +++ b/lib/eal/linux/eal_vfio.c @@ -2,6 +2,8 @@ * Copyright(c) 2010-2018 Intel Corporation */ +#include + #include #include #include @@ -29,7 +31,7 @@ * was registered by the user themselves, so we need to store the user mappings * somewhere, to recreate them later. */ -#define VFIO_MAX_USER_MEM_MAPS 256 +#define EAL_VFIO_MAX_USER_MEM_MAPS 256 struct user_mem_map { uint64_t addr; /**< start VA */ uint64_t iova; /**< start IOVA */ @@ -40,7 +42,7 @@ struct user_mem_map { struct user_mem_maps { rte_spinlock_recursive_t lock; int n_maps; - struct user_mem_map maps[VFIO_MAX_USER_MEM_MAPS]; + struct user_mem_map maps[EAL_VFIO_MAX_USER_MEM_MAPS]; }; struct vfio_config { @@ -48,12 +50,12 @@ struct vfio_config { int vfio_container_fd; int vfio_active_groups; const struct vfio_iommu_type *vfio_iommu_type; - struct vfio_group vfio_groups[VFIO_MAX_GROUPS]; + struct vfio_group vfio_groups[RTE_MAX_VFIO_GROUPS]; struct user_mem_maps mem_maps; }; /* per-process VFIO config */ -static struct vfio_config vfio_cfgs[VFIO_MAX_CONTAINERS]; +static struct vfio_config vfio_cfgs[RTE_MAX_VFIO_CONTAINERS]; static struct vfio_config *default_vfio_cfg = &vfio_cfgs[0]; static int vfio_type1_dma_map(int); @@ -69,7 +71,7 @@ static int vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr, static const struct vfio_iommu_type iommu_types[] = { /* x86 IOMMU, otherwise known as type 1 */ { - .type_id = RTE_VFIO_TYPE1, + .type_id = VFIO_TYPE1_IOMMU, .name = "Type 1", .partial_unmap = false, .dma_map_func = &vfio_type1_dma_map, @@ -77,7 +79,7 @@ static const struct vfio_iommu_type iommu_types[] = { }, /* ppc64 IOMMU, otherwise known as spapr */ { - .type_id = RTE_VFIO_SPAPR, + .type_id = VFIO_SPAPR_TCE_v2_IOMMU, .name = "sPAPR", .partial_unmap = true, .dma_map_func = &vfio_spapr_dma_map, @@ -85,7 +87,7 @@ static const struct vfio_iommu_type iommu_types[] = { }, /* IOMMU-less mode */ { - .type_id = RTE_VFIO_NOIOMMU, + .type_id = VFIO_NOIOMMU_IOMMU, .name = "No-IOMMU", .partial_unmap = true, .dma_map_func = &vfio_noiommu_dma_map, @@ -183,10 +185,10 @@ static void delete_maps(struct user_mem_maps *user_mem_maps, struct user_mem_map *del_maps, size_t n_del) { - int i; + unsigned int i; size_t j; - for (i = 0, j = 0; i < VFIO_MAX_USER_MEM_MAPS && j < n_del; i++) { + for (i = 0, j = 0; i < RTE_DIM(user_mem_maps->maps) && j < n_del; i++) { struct user_mem_map *left = &user_mem_maps->maps[i]; struct user_mem_map *right = &del_maps[j]; @@ -202,10 +204,10 @@ static void copy_maps(struct user_mem_maps *user_mem_maps, struct user_mem_map *add_maps, size_t n_add) { - int i; + unsigned int i; size_t j; - for (i = 0, j = 0; i < VFIO_MAX_USER_MEM_MAPS && j < n_add; i++) { + for (i = 0, j = 0; i < RTE_DIM(user_mem_maps->maps) && j < n_add; i++) { struct user_mem_map *left = &user_mem_maps->maps[i]; struct user_mem_map *right = &add_maps[j]; @@ -321,13 +323,13 @@ find_user_mem_maps(struct user_mem_maps *user_mem_maps, uint64_t addr, static void compact_user_maps(struct user_mem_maps *user_mem_maps) { - int i; + unsigned int i; - qsort(user_mem_maps->maps, VFIO_MAX_USER_MEM_MAPS, + qsort(user_mem_maps->maps, RTE_DIM(user_mem_maps->maps), sizeof(user_mem_maps->maps[0]), user_mem_map_cmp); /* we'll go over the list backwards when merging */ - for (i = VFIO_MAX_USER_MEM_MAPS - 2; i >= 0; i--) { + for (i = RTE_DIM(user_mem_maps->maps) - 2; i != 0; i--) { struct user_mem_map *l, *r; l = &user_mem_maps->maps[i]; @@ -344,7 +346,7 @@ compact_user_maps(struct user_mem_maps *user_mem_maps) /* the entries are still sorted, but now they have holes in them, so * sort the list again. */ - qsort(user_mem_maps->maps, VFIO_MAX_USER_MEM_MAPS, + qsort(user_mem_maps->maps, RTE_DIM(user_mem_maps->maps), sizeof(user_mem_maps->maps[0]), user_mem_map_cmp); } @@ -363,8 +365,7 @@ vfio_open_group_fd(int iommu_group_num) /* if primary, try to open the group */ if (internal_conf->process_type == RTE_PROC_PRIMARY) { /* try regular group format */ - snprintf(filename, sizeof(filename), - VFIO_GROUP_FMT, iommu_group_num); + snprintf(filename, sizeof(filename), RTE_VFIO_GROUP_FMT, iommu_group_num); vfio_group_fd = open(filename, O_RDWR); if (vfio_group_fd < 0) { /* if file not found, it's not an error */ @@ -375,9 +376,8 @@ vfio_open_group_fd(int iommu_group_num) } /* special case: try no-IOMMU path as well */ - snprintf(filename, sizeof(filename), - VFIO_NOIOMMU_GROUP_FMT, - iommu_group_num); + snprintf(filename, sizeof(filename), RTE_VFIO_NOIOMMU_GROUP_FMT, + iommu_group_num); vfio_group_fd = open(filename, O_RDWR); if (vfio_group_fd < 0) { if (errno != ENOENT) { @@ -425,11 +425,11 @@ static struct vfio_config * get_vfio_cfg_by_group_num(int iommu_group_num) { struct vfio_config *vfio_cfg; - int i, j; + unsigned int i, j; - for (i = 0; i < VFIO_MAX_CONTAINERS; i++) { + for (i = 0; i < RTE_DIM(vfio_cfgs); i++) { vfio_cfg = &vfio_cfgs[i]; - for (j = 0; j < VFIO_MAX_GROUPS; j++) { + for (j = 0; j < RTE_DIM(vfio_cfg->vfio_groups); j++) { if (vfio_cfg->vfio_groups[j].group_num == iommu_group_num) return vfio_cfg; @@ -443,30 +443,30 @@ static int vfio_get_group_fd(struct vfio_config *vfio_cfg, int iommu_group_num) { - int i; + struct vfio_group *cur_grp = NULL; int vfio_group_fd; - struct vfio_group *cur_grp; + unsigned int i; /* check if we already have the group descriptor open */ - for (i = 0; i < VFIO_MAX_GROUPS; i++) + for (i = 0; i < RTE_DIM(vfio_cfg->vfio_groups); i++) if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num) return vfio_cfg->vfio_groups[i].fd; /* Lets see first if there is room for a new group */ - if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) { + if (vfio_cfg->vfio_active_groups == RTE_DIM(vfio_cfg->vfio_groups)) { EAL_LOG(ERR, "Maximum number of VFIO groups reached!"); return -1; } /* Now lets get an index for the new group */ - for (i = 0; i < VFIO_MAX_GROUPS; i++) + for (i = 0; i < RTE_DIM(vfio_cfg->vfio_groups); i++) if (vfio_cfg->vfio_groups[i].group_num == -1) { cur_grp = &vfio_cfg->vfio_groups[i]; break; } /* This should not happen */ - if (i == VFIO_MAX_GROUPS) { + if (cur_grp == NULL) { EAL_LOG(ERR, "No VFIO group free slot found"); return -1; } @@ -489,11 +489,11 @@ static struct vfio_config * get_vfio_cfg_by_group_fd(int vfio_group_fd) { struct vfio_config *vfio_cfg; - int i, j; + unsigned int i, j; - for (i = 0; i < VFIO_MAX_CONTAINERS; i++) { + for (i = 0; i < RTE_DIM(vfio_cfgs); i++) { vfio_cfg = &vfio_cfgs[i]; - for (j = 0; j < VFIO_MAX_GROUPS; j++) + for (j = 0; j < RTE_DIM(vfio_cfg->vfio_groups); j++) if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd) return vfio_cfg; } @@ -504,12 +504,12 @@ get_vfio_cfg_by_group_fd(int vfio_group_fd) static struct vfio_config * get_vfio_cfg_by_container_fd(int container_fd) { - int i; + unsigned int i; if (container_fd == RTE_VFIO_DEFAULT_CONTAINER_FD) return default_vfio_cfg; - for (i = 0; i < VFIO_MAX_CONTAINERS; i++) { + for (i = 0; i < RTE_DIM(vfio_cfgs); i++) { if (vfio_cfgs[i].vfio_container_fd == container_fd) return &vfio_cfgs[i]; } @@ -534,11 +534,11 @@ static int get_vfio_group_idx(int vfio_group_fd) { struct vfio_config *vfio_cfg; - int i, j; + unsigned int i, j; - for (i = 0; i < VFIO_MAX_CONTAINERS; i++) { + for (i = 0; i < RTE_DIM(vfio_cfgs); i++) { vfio_cfg = &vfio_cfgs[i]; - for (j = 0; j < VFIO_MAX_GROUPS; j++) + for (j = 0; j < RTE_DIM(vfio_cfg->vfio_groups); j++) if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd) return j; } @@ -559,7 +559,7 @@ vfio_group_device_get(int vfio_group_fd) } i = get_vfio_group_idx(vfio_group_fd); - if (i < 0 || i > (VFIO_MAX_GROUPS - 1)) + if (i < 0) EAL_LOG(ERR, "Wrong VFIO group index (%d)", i); else vfio_cfg->vfio_groups[i].devices++; @@ -578,7 +578,7 @@ vfio_group_device_put(int vfio_group_fd) } i = get_vfio_group_idx(vfio_group_fd); - if (i < 0 || i > (VFIO_MAX_GROUPS - 1)) + if (i < 0) EAL_LOG(ERR, "Wrong VFIO group index (%d)", i); else vfio_cfg->vfio_groups[i].devices--; @@ -597,7 +597,7 @@ vfio_group_device_count(int vfio_group_fd) } i = get_vfio_group_idx(vfio_group_fd); - if (i < 0 || i > (VFIO_MAX_GROUPS - 1)) { + if (i < 0) { EAL_LOG(ERR, "Wrong VFIO group index (%d)", i); return -1; } @@ -1088,7 +1088,7 @@ int rte_vfio_enable(const char *modname) { /* initialize group list */ - int i, j; + unsigned int i, j; int vfio_available; DIR *dir; const struct internal_config *internal_conf = @@ -1096,13 +1096,13 @@ rte_vfio_enable(const char *modname) rte_spinlock_recursive_t lock = RTE_SPINLOCK_RECURSIVE_INITIALIZER; - for (i = 0; i < VFIO_MAX_CONTAINERS; i++) { + for (i = 0; i < RTE_DIM(vfio_cfgs); i++) { vfio_cfgs[i].vfio_container_fd = -1; vfio_cfgs[i].vfio_active_groups = 0; vfio_cfgs[i].vfio_iommu_type = NULL; vfio_cfgs[i].mem_maps.lock = lock; - for (j = 0; j < VFIO_MAX_GROUPS; j++) { + for (j = 0; j < RTE_DIM(vfio_cfgs[i].vfio_groups); j++) { vfio_cfgs[i].vfio_groups[j].fd = -1; vfio_cfgs[i].vfio_groups[j].group_num = -1; vfio_cfgs[i].vfio_groups[j].devices = 0; @@ -1128,7 +1128,7 @@ rte_vfio_enable(const char *modname) } /* VFIO directory might not exist (e.g., unprivileged containers) */ - dir = opendir(VFIO_DIR); + dir = opendir(RTE_VFIO_DIR); if (dir == NULL) { EAL_LOG(DEBUG, "VFIO directory does not exist, skipping VFIO support..."); @@ -1315,15 +1315,12 @@ rte_vfio_get_container_fd(void) const struct internal_config *internal_conf = eal_get_internal_configuration(); - /* if we're in a primary process, try to open the container */ if (internal_conf->process_type == RTE_PROC_PRIMARY) { - vfio_container_fd = open(VFIO_CONTAINER_PATH, O_RDWR); + vfio_container_fd = open(RTE_VFIO_CONTAINER_PATH, O_RDWR); if (vfio_container_fd < 0) { - EAL_LOG(ERR, - "Cannot open VFIO container %s, error " - "%i (%s)", VFIO_CONTAINER_PATH, - errno, strerror(errno)); + EAL_LOG(ERR, "Cannot open VFIO container %s, error %i (%s)", + RTE_VFIO_CONTAINER_PATH, errno, strerror(errno)); return -1; } @@ -1900,7 +1897,7 @@ container_dma_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, user_mem_maps = &vfio_cfg->mem_maps; rte_spinlock_recursive_lock(&user_mem_maps->lock); - if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) { + if (user_mem_maps->n_maps == RTE_DIM(user_mem_maps->maps)) { EAL_LOG(ERR, "No more space for user mem maps"); rte_errno = ENOMEM; ret = -1; @@ -1940,11 +1937,12 @@ static int container_dma_unmap(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, uint64_t len) { - struct user_mem_map orig_maps[VFIO_MAX_USER_MEM_MAPS]; + struct user_mem_map orig_maps[RTE_DIM(vfio_cfg->mem_maps.maps)]; struct user_mem_map new_maps[2]; /* can be at most 2 */ struct user_mem_maps *user_mem_maps; - int n_orig, n_new, newlen, ret = 0; + int n_orig, n_new, ret = 0; bool has_partial_unmap; + unsigned int newlen; user_mem_maps = &vfio_cfg->mem_maps; rte_spinlock_recursive_lock(&user_mem_maps->lock); @@ -2010,7 +2008,7 @@ container_dma_unmap(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, /* can we store the new maps in our list? */ newlen = (user_mem_maps->n_maps - n_orig) + n_new; - if (newlen >= VFIO_MAX_USER_MEM_MAPS) { + if (newlen >= RTE_DIM(user_mem_maps->maps)) { EAL_LOG(ERR, "Not enough space to store partial mapping"); rte_errno = ENOMEM; ret = -1; @@ -2053,7 +2051,7 @@ rte_vfio_noiommu_is_enabled(void) ssize_t cnt; char c; - fd = open(VFIO_NOIOMMU_MODE, O_RDONLY); + fd = open(RTE_VFIO_NOIOMMU_MODE, O_RDONLY); if (fd < 0) { if (errno != ENOENT) { EAL_LOG(ERR, "Cannot open VFIO noiommu file " @@ -2082,15 +2080,15 @@ RTE_EXPORT_SYMBOL(rte_vfio_container_create) int rte_vfio_container_create(void) { - int i; + unsigned int i; /* Find an empty slot to store new vfio config */ - for (i = 1; i < VFIO_MAX_CONTAINERS; i++) { + for (i = 1; i < RTE_DIM(vfio_cfgs); i++) { if (vfio_cfgs[i].vfio_container_fd == -1) break; } - if (i == VFIO_MAX_CONTAINERS) { + if (i == RTE_DIM(vfio_cfgs)) { EAL_LOG(ERR, "Exceed max VFIO container limit"); return -1; } @@ -2109,7 +2107,7 @@ int rte_vfio_container_destroy(int container_fd) { struct vfio_config *vfio_cfg; - int i; + unsigned int i; vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); if (vfio_cfg == NULL) { @@ -2117,7 +2115,7 @@ rte_vfio_container_destroy(int container_fd) return -1; } - for (i = 0; i < VFIO_MAX_GROUPS; i++) + for (i = 0; i < RTE_DIM(vfio_cfg->vfio_groups); i++) if (vfio_cfg->vfio_groups[i].group_num != -1) rte_vfio_container_group_unbind(container_fd, vfio_cfg->vfio_groups[i].group_num); @@ -2149,9 +2147,9 @@ RTE_EXPORT_SYMBOL(rte_vfio_container_group_unbind) int rte_vfio_container_group_unbind(int container_fd, int iommu_group_num) { - struct vfio_config *vfio_cfg; struct vfio_group *cur_grp = NULL; - int i; + struct vfio_config *vfio_cfg; + unsigned int i; vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); if (vfio_cfg == NULL) { @@ -2159,7 +2157,7 @@ rte_vfio_container_group_unbind(int container_fd, int iommu_group_num) return -1; } - for (i = 0; i < VFIO_MAX_GROUPS; i++) { + for (i = 0; i < RTE_DIM(vfio_cfg->vfio_groups); i++) { if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num) { cur_grp = &vfio_cfg->vfio_groups[i]; break; @@ -2167,7 +2165,7 @@ rte_vfio_container_group_unbind(int container_fd, int iommu_group_num) } /* This should not happen */ - if (i == VFIO_MAX_GROUPS || cur_grp == NULL) { + if (cur_grp == NULL) { EAL_LOG(ERR, "Specified VFIO group number not found"); return -1; } diff --git a/lib/eal/linux/eal_vfio.h b/lib/eal/linux/eal_vfio.h index 23a787ad20c..5c5742b4296 100644 --- a/lib/eal/linux/eal_vfio.h +++ b/lib/eal/linux/eal_vfio.h @@ -7,84 +7,7 @@ #include -/* - * determine if VFIO is present on the system - */ -#if !defined(VFIO_PRESENT) && defined(RTE_EAL_VFIO) -#include -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) -#define VFIO_PRESENT -#else -#pragma message("VFIO configured but not supported by this kernel, disabling.") -#endif /* kernel version >= 3.6.0 */ -#endif /* RTE_EAL_VFIO */ - -#ifdef VFIO_PRESENT - #include -#include - -#define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU - -#ifndef VFIO_SPAPR_TCE_v2_IOMMU -#define RTE_VFIO_SPAPR 7 -#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17) -#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18) -#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19) -#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20) - -struct vfio_iommu_spapr_register_memory { - uint32_t argsz; - uint32_t flags; - uint64_t vaddr; - uint64_t size; -}; - -struct vfio_iommu_spapr_tce_create { - uint32_t argsz; - uint32_t flags; - /* in */ - uint32_t page_shift; - uint32_t __resv1; - uint64_t window_size; - uint32_t levels; - uint32_t __resv2; - /* out */ - uint64_t start_addr; -}; - -struct vfio_iommu_spapr_tce_remove { - uint32_t argsz; - uint32_t flags; - /* in */ - uint64_t start_addr; -}; - -struct vfio_iommu_spapr_tce_ddw_info { - uint64_t pgsizes; - uint32_t max_dynamic_windows_supported; - uint32_t levels; -}; - -/* SPAPR_v2 is not present, but SPAPR might be */ -#ifndef VFIO_SPAPR_TCE_IOMMU -#define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) - -struct vfio_iommu_spapr_tce_info { - uint32_t argsz; - uint32_t flags; - uint32_t dma32_window_start; - uint32_t dma32_window_size; - struct vfio_iommu_spapr_tce_ddw_info ddw; -}; -#endif /* VFIO_SPAPR_TCE_IOMMU */ - -#else /* VFIO_SPAPR_TCE_v2_IOMMU */ -#define RTE_VFIO_SPAPR VFIO_SPAPR_TCE_v2_IOMMU -#endif - -#define VFIO_MAX_GROUPS RTE_MAX_VFIO_GROUPS -#define VFIO_MAX_CONTAINERS RTE_MAX_VFIO_CONTAINERS /* * we don't need to store device fd's anywhere since they can be obtained from @@ -154,6 +77,4 @@ struct vfio_mp_param { }; }; -#endif /* VFIO_PRESENT */ - #endif /* EAL_VFIO_H_ */ diff --git a/lib/eal/linux/eal_vfio_mp_sync.c b/lib/eal/linux/eal_vfio_mp_sync.c index ce14e260fe4..8230f3d24d5 100644 --- a/lib/eal/linux/eal_vfio_mp_sync.c +++ b/lib/eal/linux/eal_vfio_mp_sync.c @@ -14,15 +14,6 @@ #include "eal_private.h" #include "eal_vfio.h" -/** - * @file - * VFIO socket for communication between primary and secondary processes. - * - * This file is only compiled if RTE_EAL_VFIO is set. - */ - -#ifdef VFIO_PRESENT - static int vfio_mp_primary(const struct rte_mp_msg *msg, const void *peer) { @@ -129,4 +120,3 @@ vfio_mp_sync_cleanup(void) rte_mp_action_unregister(EAL_VFIO_MP); } -#endif diff --git a/lib/eal/loongarch/include/meson.build b/lib/eal/loongarch/include/meson.build index 6e8d12601a0..574aa6dac22 100644 --- a/lib/eal/loongarch/include/meson.build +++ b/lib/eal/loongarch/include/meson.build @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2022 Loongson Technology Corporation Limited -arch_headers = files( +dpdk_arch_headers += files( 'rte_atomic.h', 'rte_byteorder.h', 'rte_cpuflags.h', @@ -15,4 +15,3 @@ arch_headers = files( 'rte_spinlock.h', 'rte_vect.h', ) -install_headers(arch_headers, subdir: get_option('include_subdir_arch')) diff --git a/lib/eal/ppc/include/meson.build b/lib/eal/ppc/include/meson.build index fa64330f012..87887187b82 100644 --- a/lib/eal/ppc/include/meson.build +++ b/lib/eal/ppc/include/meson.build @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2018 Luca Boccassi -arch_headers = files( +dpdk_arch_headers += files( 'rte_altivec.h', 'rte_atomic.h', 'rte_byteorder.h', @@ -16,4 +16,3 @@ arch_headers = files( 'rte_spinlock.h', 'rte_vect.h', ) -install_headers(arch_headers, subdir: get_option('include_subdir_arch')) diff --git a/lib/eal/riscv/include/meson.build b/lib/eal/riscv/include/meson.build index 481c7d50a44..a7a387fb8ac 100644 --- a/lib/eal/riscv/include/meson.build +++ b/lib/eal/riscv/include/meson.build @@ -3,7 +3,7 @@ # Copyright(c) 2022 SiFive # Copyright(c) 2022 Semihalf -arch_headers = files( +dpdk_arch_headers += files( 'rte_atomic.h', 'rte_byteorder.h', 'rte_cpuflags.h', @@ -17,4 +17,3 @@ arch_headers = files( 'rte_spinlock.h', 'rte_vect.h', ) -install_headers(arch_headers, subdir: get_option('include_subdir_arch')) diff --git a/lib/eal/windows/meson.build b/lib/eal/windows/meson.build index c526ede405c..afa029d1fc6 100644 --- a/lib/eal/windows/meson.build +++ b/lib/eal/windows/meson.build @@ -24,12 +24,24 @@ sources += files( dpdk_conf.set10('RTE_EAL_NUMA_AWARE_HUGEPAGES', true) -ext_deps += [ - cc.find_library('dbghelp'), - cc.find_library('setupapi'), - cc.find_library('ws2_32'), -] +dbghelp_dep = cc.find_library('dbghelp') +if not cc.links(min_c_code, dependencies: dbghelp_dep) + error('broken dependency, "dbghelp"') +endif +setupapi_dep = cc.find_library('setupapi') +if not cc.links(min_c_code, dependencies: setupapi_dep) + error('broken dependency, "setupapi"') +endif +ws2_32_dep = cc.find_library('ws2_32') +if not cc.links(min_c_code, dependencies: ws2_32_dep) + error('broken dependency, "ws2_32"') +endif +ext_deps += [dbghelp_dep, setupapi_dep, ws2_32_dep] if is_ms_linker # Contrary to docs, VirtualAlloc2() is exported by mincore.lib. - ext_deps += cc.find_library('mincore') + mincore_dep = cc.find_library('mincore') + if not cc.links(min_c_code, dependencies: mincore_dep) + error('broken dependency, "mincore"') + endif + ext_deps += mincore_dep endif diff --git a/lib/eal/x86/include/meson.build b/lib/eal/x86/include/meson.build index 52d2f8e9699..71f149e8214 100644 --- a/lib/eal/x86/include/meson.build +++ b/lib/eal/x86/include/meson.build @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation -arch_headers = files( +dpdk_arch_headers += files( 'rte_atomic.h', 'rte_byteorder.h', 'rte_cpuflags.h', @@ -16,11 +16,9 @@ arch_headers = files( 'rte_spinlock.h', 'rte_vect.h', ) -arch_indirect_headers = files( +dpdk_arch_indirect_headers += files( 'rte_atomic_32.h', 'rte_atomic_64.h', 'rte_byteorder_32.h', 'rte_byteorder_64.h', ) -install_headers(arch_headers + arch_indirect_headers, subdir: get_option('include_subdir_arch')) -dpdk_chkinc_headers += arch_headers diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build index f1d25865917..8ba6c708a24 100644 --- a/lib/ethdev/meson.build +++ b/lib/ethdev/meson.build @@ -26,11 +26,8 @@ headers = files( 'rte_ethdev_trace_fp.h', 'rte_dev_info.h', 'rte_flow.h', - 'rte_flow_driver.h', 'rte_mtr.h', - 'rte_mtr_driver.h', 'rte_tm.h', - 'rte_tm_driver.h', ) indirect_headers += files( @@ -42,6 +39,9 @@ driver_sdk_headers += files( 'ethdev_driver.h', 'ethdev_pci.h', 'ethdev_vdev.h', + 'rte_flow_driver.h', + 'rte_mtr_driver.h', + 'rte_tm_driver.h', ) if is_linux diff --git a/lib/eventdev/rte_event_dma_adapter.h b/lib/eventdev/rte_event_dma_adapter.h index 5c480b82ffc..34142f26db2 100644 --- a/lib/eventdev/rte_event_dma_adapter.h +++ b/lib/eventdev/rte_event_dma_adapter.h @@ -144,7 +144,7 @@ #include #include -#include +#include #include #ifdef __cplusplus diff --git a/lib/gpudev/gpudev.c b/lib/gpudev/gpudev.c index 0473d9ffb34..4a2335834c0 100644 --- a/lib/gpudev/gpudev.c +++ b/lib/gpudev/gpudev.c @@ -3,6 +3,7 @@ */ #include +#include #include #include diff --git a/lib/gpudev/gpudev_driver.h b/lib/gpudev/gpudev_driver.h index 37b6ae31497..b7621f6e5af 100644 --- a/lib/gpudev/gpudev_driver.h +++ b/lib/gpudev/gpudev_driver.h @@ -12,11 +12,11 @@ #define RTE_GPUDEV_DRIVER_H #include -#include #include #include +#include #include "rte_gpudev.h" #ifdef __cplusplus @@ -80,7 +80,7 @@ struct __rte_cache_aligned rte_gpu { /* Driver functions. */ struct rte_gpu_ops ops; /* Event callback list. */ - TAILQ_HEAD(rte_gpu_callback_list, rte_gpu_callback) callbacks; + RTE_TAILQ_HEAD(rte_gpu_callback_list, rte_gpu_callback) callbacks; /* Current state (used or not) in the running process. */ enum rte_gpu_state process_state; /* Updated by this library. */ /* Driver-specific private data for the running process. */ diff --git a/lib/meson.build b/lib/meson.build index a67efaf718a..d79740a4c35 100644 --- a/lib/meson.build +++ b/lib/meson.build @@ -208,13 +208,9 @@ foreach l:libraries dpdk_libs_enabled += name dpdk_conf.set('RTE_LIB_' + name.to_upper(), 1) - install_headers(headers) - install_headers(indirect_headers) - if get_option('enable_driver_sdk') - install_headers(driver_sdk_headers) - endif - dpdk_chkinc_headers += headers - dpdk_chkinc_headers += driver_sdk_headers + dpdk_headers += headers + dpdk_indirect_headers += indirect_headers + dpdk_drivers_headers += driver_sdk_headers libname = 'rte_' + name includes += include_directories(l) diff --git a/lib/mldev/meson.build b/lib/mldev/meson.build index 0079ccd2052..39f1fba927b 100644 --- a/lib/mldev/meson.build +++ b/lib/mldev/meson.build @@ -26,11 +26,8 @@ headers = files( 'rte_mldev.h', ) -indirect_headers += files( - 'rte_mldev_core.h', -) - driver_sdk_headers += files( + 'rte_mldev_core.h', 'rte_mldev_pmd.h', 'mldev_utils.h', ) diff --git a/lib/power/meson.build b/lib/power/meson.build index 56b59071ea0..d89fb555148 100644 --- a/lib/power/meson.build +++ b/lib/power/meson.build @@ -22,12 +22,15 @@ sources = files( 'rte_power_uncore.c', ) headers = files( - 'power_cpufreq.h', - 'power_uncore_ops.h', 'rte_power_cpufreq.h', 'rte_power_pmd_mgmt.h', 'rte_power_qos.h', 'rte_power_uncore.h', ) +driver_sdk_headers = files( + 'power_common.h', + 'power_cpufreq.h', + 'power_uncore_ops.h', +) deps += ['timer', 'ethdev'] diff --git a/lib/power/power_cpufreq.h b/lib/power/power_cpufreq.h index 92f1ab8f379..fb0b7feb824 100644 --- a/lib/power/power_cpufreq.h +++ b/lib/power/power_cpufreq.h @@ -14,6 +14,7 @@ #include #include #include +#include #define RTE_POWER_DRIVER_NAMESZ 24 @@ -131,19 +132,6 @@ typedef int (*rte_power_freq_change_t)(unsigned int lcore_id); * - Negative on error. */ -/** - * Power capabilities summary. - */ -struct rte_power_core_capabilities { - union { - uint64_t capabilities; - struct { - uint64_t turbo:1; /**< Turbo can be enabled. */ - uint64_t priority:1; /**< SST-BF high freq core */ - }; - }; -}; - typedef int (*rte_power_get_capabilities_t)(unsigned int lcore_id, struct rte_power_core_capabilities *caps); diff --git a/lib/power/power_uncore_ops.h b/lib/power/power_uncore_ops.h index b92af28df97..783860ee5b7 100644 --- a/lib/power/power_uncore_ops.h +++ b/lib/power/power_uncore_ops.h @@ -13,6 +13,7 @@ #include #include +#include #define RTE_POWER_UNCORE_DRIVER_NAMESZ 24 diff --git a/lib/power/rte_power_cpufreq.c b/lib/power/rte_power_cpufreq.c index d4db03a4e57..f63e976dc24 100644 --- a/lib/power/rte_power_cpufreq.c +++ b/lib/power/rte_power_cpufreq.c @@ -6,8 +6,8 @@ #include #include -#include "rte_power_cpufreq.h" #include "power_common.h" +#include "power_cpufreq.h" static enum power_management_env global_default_env = PM_ENV_NOT_SET; static struct rte_power_cpufreq_ops *global_cpufreq_ops; diff --git a/lib/power/rte_power_cpufreq.h b/lib/power/rte_power_cpufreq.h index 82d274214b3..1605ba866a7 100644 --- a/lib/power/rte_power_cpufreq.h +++ b/lib/power/rte_power_cpufreq.h @@ -14,8 +14,6 @@ #include #include -#include "power_cpufreq.h" - #ifdef __cplusplus extern "C" { #endif @@ -248,6 +246,19 @@ int rte_power_freq_enable_turbo(unsigned int lcore_id); */ int rte_power_freq_disable_turbo(unsigned int lcore_id); +/** + * Power capabilities summary. + */ +struct rte_power_core_capabilities { + union { + uint64_t capabilities; + struct { + uint64_t turbo:1; /**< Turbo can be enabled. */ + uint64_t priority:1; /**< SST-BF high freq core */ + }; + }; +}; + /** * Returns power capabilities for a specific lcore. * Function pointer definition. Review each environments diff --git a/lib/power/rte_power_uncore.c b/lib/power/rte_power_uncore.c index 30cd3741276..25bdb113c55 100644 --- a/lib/power/rte_power_uncore.c +++ b/lib/power/rte_power_uncore.c @@ -7,8 +7,8 @@ #include #include -#include "rte_power_uncore.h" #include "power_common.h" +#include "power_uncore_ops.h" static enum rte_uncore_power_mgmt_env global_uncore_env = RTE_UNCORE_PM_ENV_NOT_SET; static struct rte_power_uncore_ops *global_uncore_ops; diff --git a/lib/power/rte_power_uncore.h b/lib/power/rte_power_uncore.h index dfeade77e92..66aea1b37f0 100644 --- a/lib/power/rte_power_uncore.h +++ b/lib/power/rte_power_uncore.h @@ -11,7 +11,8 @@ * Uncore Frequency Management */ -#include "power_uncore_ops.h" +#include +#include #ifdef __cplusplus extern "C" { diff --git a/lib/rawdev/meson.build b/lib/rawdev/meson.build index 7dfc3d5cf94..ccfd922fdad 100644 --- a/lib/rawdev/meson.build +++ b/lib/rawdev/meson.build @@ -8,6 +8,7 @@ if is_windows endif sources = files('rte_rawdev.c') -headers = files('rte_rawdev.h', 'rte_rawdev_pmd.h') +headers = files('rte_rawdev.h') +driver_sdk_headers = files('rte_rawdev_pmd.h') deps += ['telemetry'] diff --git a/lib/regexdev/meson.build b/lib/regexdev/meson.build index 7e12d8cd6d0..2684da38253 100644 --- a/lib/regexdev/meson.build +++ b/lib/regexdev/meson.build @@ -2,6 +2,7 @@ # Copyright 2020 Mellanox Technologies, Ltd sources = files('rte_regexdev.c') -headers = files('rte_regexdev.h', 'rte_regexdev_driver.h') +headers = files('rte_regexdev.h') indirect_headers += files('rte_regexdev_core.h') +driver_sdk_headers = files('rte_regexdev_driver.h') deps += ['mbuf'] diff --git a/lib/security/meson.build b/lib/security/meson.build index 1034a7a299d..d5431d472cc 100644 --- a/lib/security/meson.build +++ b/lib/security/meson.build @@ -2,5 +2,6 @@ # Copyright(c) 2017-2019 Intel Corporation sources = files('rte_security.c') -headers = files('rte_security.h', 'rte_security_driver.h') +headers = files('rte_security.h') +driver_sdk_headers = files('rte_security_driver.h') deps += ['mempool', 'cryptodev', 'net'] diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c index 9de7f04a4f0..2015ae9f21a 100644 --- a/lib/vhost/vduse.c +++ b/lib/vhost/vduse.c @@ -2,13 +2,13 @@ * Copyright (c) 2023 Red Hat, Inc. */ +#include + #include #include #include #include - -#include #include #include diff --git a/meson.build b/meson.build index 2423884df7f..b01010ffa07 100644 --- a/meson.build +++ b/meson.build @@ -37,7 +37,12 @@ dpdk_libraries = [] dpdk_static_libraries = [] dpdk_shared_lib_deps = [] dpdk_static_lib_deps = [] -dpdk_chkinc_headers = [] +dpdk_arch_headers = [] +dpdk_arch_indirect_headers = [] +dpdk_generic_headers = [] +dpdk_headers = [] +dpdk_indirect_headers = [] +dpdk_drivers_headers = [] dpdk_driver_classes = [] dpdk_drivers = [] dpdk_extra_ldflags = [] @@ -51,6 +56,9 @@ testpmd_drivers_sources = [] testpmd_drivers_deps = [] abi_version_file = files('ABI_VERSION') +# minimal C code for testing library linking compatibility +min_c_code = 'int main(void) { return 0; }' + if host_machine.cpu_family().startswith('x86') arch_subdir = 'x86' elif host_machine.cpu_family().startswith('arm') or host_machine.cpu_family().startswith('aarch') @@ -64,9 +72,9 @@ elif host_machine.cpu_family().startswith('riscv') endif # configure the build, and make sure configs here and in config folder are -# able to be included in any file. We also store a global array of include dirs -# for passing to pmdinfogen scripts -global_inc = [include_directories('.', 'config', +# able to be included in any file +config_inc = [include_directories('.', 'config')] +global_inc = [config_inc, include_directories( 'lib/eal/include', 'lib/eal/@0@/include'.format(host_machine.system()), 'lib/eal/@0@/include'.format(arch_subdir), @@ -106,6 +114,14 @@ if get_option('check_includes') subdir('buildtools/chkincs') endif +install_headers(dpdk_arch_headers + dpdk_arch_indirect_headers, + subdir: get_option('include_subdir_arch')) +install_headers(dpdk_headers + dpdk_indirect_headers) +install_headers(dpdk_generic_headers, subdir: 'generic') +if get_option('enable_driver_sdk') + install_headers(dpdk_drivers_headers) +endif + # write the build config build_cfg = 'rte_build_config.h' configure_file(output: build_cfg,