From ef2ce32760dc54c2c9e844d09ed2fe03fcdb57f0 Mon Sep 17 00:00:00 2001 From: pbalcer Date: Thu, 6 Apr 2023 11:27:06 +0200 Subject: [PATCH] [tools] add unified runtime tracing tool urtrace is a command-line tool for tracing and profiling Unified Runtime library calls in a process. It can be used to inspect the function arguments of each call, and provides options for filtering, output formatting, and profiling. --- .github/workflows/cmake.yml | 4 +- CMakeLists.txt | 4 + README.md | 1 + cmake/match.cmake | 17 +- cmake/match.py | 41 +++ examples/hello_world/hello_world.cpp | 2 +- source/common/logger/ur_logger.hpp | 17 +- source/common/logger/ur_logger_details.hpp | 4 + source/common/logger/ur_sinks.hpp | 45 ++- source/common/ur_util.hpp | 10 +- test/CMakeLists.txt | 3 + test/layers/tracing/hello_world.out.match | 2 +- test/tools/CMakeLists.txt | 6 + test/tools/urtrace/CMakeLists.txt | 23 ++ test/tools/urtrace/null_hello.match | 12 + test/tools/urtrace/null_hello_begin.match | 21 ++ .../urtrace/null_hello_filter_device.match | 7 + test/tools/urtrace/null_hello_no_args.match | 12 + test/tools/urtrace/null_hello_profiling.match | 12 + tools/CMakeLists.txt | 8 + tools/urtrace/CMakeLists.txt | 26 ++ tools/urtrace/README.md | 35 ++ tools/urtrace/collector.cpp | 310 ++++++++++++++++++ tools/urtrace/urtrace | 135 ++++++++ 24 files changed, 718 insertions(+), 39 deletions(-) create mode 100755 cmake/match.py create mode 100644 test/tools/CMakeLists.txt create mode 100644 test/tools/urtrace/CMakeLists.txt create mode 100644 test/tools/urtrace/null_hello.match create mode 100644 test/tools/urtrace/null_hello_begin.match create mode 100644 test/tools/urtrace/null_hello_filter_device.match create mode 100644 test/tools/urtrace/null_hello_no_args.match create mode 100644 test/tools/urtrace/null_hello_profiling.match create mode 100644 tools/CMakeLists.txt create mode 100644 tools/urtrace/CMakeLists.txt create mode 100644 tools/urtrace/README.md create mode 100644 tools/urtrace/collector.cpp create mode 100755 tools/urtrace/urtrace diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 713247ba71..9b3c34971b 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -35,7 +35,7 @@ jobs: - name: Test working-directory: ${{github.workspace}}/build - run: ctest -C ${{env.BUILD_TYPE}} --output-on-failure -L "python|uma|loader|validation|tracing|unit" + run: ctest -C ${{env.BUILD_TYPE}} --output-on-failure -L "python|uma|loader|validation|tracing|unit|urtrace" windows-build: name: Build - Windows @@ -62,4 +62,4 @@ jobs: - name: Test working-directory: ${{github.workspace}}/build - run: ctest -C ${{env.BUILD_TYPE}} --output-on-failure -L "python|uma|loader|validation|tracing|unit" + run: ctest -C ${{env.BUILD_TYPE}} --output-on-failure -L "python|uma|loader|validation|tracing|unit|urtrace" diff --git a/CMakeLists.txt b/CMakeLists.txt index 22705b2709..14b62bae6a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ option(UR_USE_MSAN "enable MemorySanitizer" OFF) option(UMA_BUILD_SHARED_LIBRARY "Build UMA as shared library" OFF) option(UR_ENABLE_TRACING "enable api tracing through xpti" OFF) option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF) +option(UR_BUILD_TOOLS "build ur tools" ON) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) @@ -181,6 +182,9 @@ add_subdirectory(examples) if(UR_BUILD_TESTS) add_subdirectory(test) endif() +if(UR_BUILD_TOOLS) + add_subdirectory(tools) +endif() # Add the list of installed targets to the install. This includes the namespace # which all installed targets will be prefixed with, e.g. for the headers diff --git a/README.md b/README.md index c9a0c41296..6b34d4d299 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,7 @@ List of options provided by CMake: | UR_USE_UBSAN | Enable UndefinedBehavior Sanitizer | ON/OFF | OFF | | UR_USE_MSAN | Enable MemorySanitizer (clang only) | ON/OFF | OFF | | UR_ENABLE_TRACING | Enable XPTI-based tracing layer | ON/OFF | OFF | +| UR_BUILD_TOOLS | Build tools | ON/OFF | ON | **General**: diff --git a/cmake/match.cmake b/cmake/match.cmake index 3130cac045..01ed35465f 100644 --- a/cmake/match.cmake +++ b/cmake/match.cmake @@ -6,6 +6,8 @@ # with a known good match file. # +find_package(Python3 COMPONENTS Interpreter) + if(NOT DEFINED TEST_FILE) message(FATAL_ERROR "TEST_FILE needs to be defined") endif() @@ -15,23 +17,30 @@ endif() set(TEST_OUT "_matchtmpfile") +if(NOT DEFINED TEST_ARGS) # easier than ifdefing the rest of the code + set(TEST_ARGS "") +endif() + +string(REPLACE "\"" "" TEST_ARGS "${TEST_ARGS}") +separate_arguments(TEST_ARGS) + execute_process( - COMMAND ${TEST_FILE} + COMMAND ${TEST_FILE} ${TEST_ARGS} OUTPUT_FILE ${TEST_OUT} RESULT_VARIABLE TEST_RESULT ) if(TEST_RESULT) - message(FATAL_ERROR "Failed: Test ${TEST_FILE} returned non-zero (${TEST_ERROR}).") + message(FATAL_ERROR "Failed: Test ${TEST_FILE} ${TEST_ARGS} returned non-zero (${TEST_RESULT}).") endif() execute_process( - COMMAND ${CMAKE_COMMAND} -E compare_files ${TEST_OUT} ${MATCH_FILE} + COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/match.py ${TEST_OUT} ${MATCH_FILE} RESULT_VARIABLE TEST_RESULT ) if(TEST_RESULT) - message(FATAL_ERROR "Failed: The output of ${TEST_FILE} (stored in ${TEST_OUT}) does not match ${MATCH_FILE}") + message(FATAL_ERROR "Failed: The output of ${TEST_FILE} (stored in ${TEST_OUT}) does not match ${MATCH_FILE} (${TEST_RESULT})") elseif() message("Passed: The output of ${TEST_FILE} matches ${MATCH_FILE}") endif() diff --git a/cmake/match.py b/cmake/match.py new file mode 100755 index 0000000000..7911748c4d --- /dev/null +++ b/cmake/match.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: MIT + +# check if all lines match in a file +# lines in a match file can contain regex inside of double curly braces {{}} + +import sys +import re + +if len(sys.argv) != 3: + print("Usage: python match.py ") + sys.exit(1) + +input_file = sys.argv[1] +match_file = sys.argv[2] + +with open(input_file, 'r') as input, open(match_file, 'r') as match: + input_lines = input.readlines() + match_lines = match.readlines() + +if len(match_lines) != len(input_lines): + sys.exit(f"Line count doesn't match (is: {len(input_lines)}, expected: {len(match_lines)})") + +for i, match_line in enumerate(match_lines): + # split into parts at {{ }} + match_parts = re.split(r'\{{(.*?)\}}', match_line.strip()) + pattern = "" + for j, part in enumerate(match_parts): + if j % 2 == 0: + pattern += re.escape(part) + else: + pattern += part + + input_line = input_lines[i].strip() + if not re.fullmatch(pattern, input_line): + print(f"Line {i+1} does not match".format(i+1)) + print("is: " + input_line) + print("expected: " + match_line.strip()) + sys.exit(1) diff --git a/examples/hello_world/hello_world.cpp b/examples/hello_world/hello_world.cpp index 2d2049bc84..dc3ffc226c 100644 --- a/examples/hello_world/hello_world.cpp +++ b/examples/hello_world/hello_world.cpp @@ -70,7 +70,7 @@ int main(int argc, char *argv[]) { goto out; } for (auto d : devices) { - ur_device_type_t device_type; + ur_device_type_t device_type = UR_DEVICE_TYPE_ALL; status = urDeviceGetInfo( d, UR_DEVICE_INFO_TYPE, sizeof(ur_device_type_t), static_cast(&device_type), nullptr); diff --git a/source/common/logger/ur_logger.hpp b/source/common/logger/ur_logger.hpp index 34b39379db..0d91580868 100644 --- a/source/common/logger/ur_logger.hpp +++ b/source/common/logger/ur_logger.hpp @@ -12,7 +12,7 @@ namespace logger { -Logger create_logger(std::string logger_name); +Logger create_logger(std::string logger_name, bool skip_prefix = false); inline Logger &get_logger(std::string name = "common") { static Logger logger = create_logger(name); @@ -66,7 +66,7 @@ inline void setFlushLevel(logger::Level level) { /// - flush level: error, meaning that only error messages are guaranteed /// to be printed immediately as they occur /// - output: stderr -inline Logger create_logger(std::string logger_name) { +inline Logger create_logger(std::string logger_name, bool skip_prefix) { std::transform(logger_name.begin(), logger_name.end(), logger_name.begin(), ::toupper); std::stringstream env_var_name; @@ -80,7 +80,8 @@ inline Logger create_logger(std::string logger_name) { env_var_name << "UR_LOG_" << logger_name; auto map = getenv_to_map(env_var_name.str().c_str()); if (!map.has_value()) { - return Logger(std::make_unique(logger_name)); + return Logger( + std::make_unique(logger_name, skip_prefix)); } try { @@ -102,14 +103,16 @@ inline Logger create_logger(std::string logger_name) { values = kv->second; } - sink = values.size() == 2 - ? sink_from_str(logger_name, values[0], values[1]) - : sink_from_str(logger_name, values[0]); + sink = + values.size() == 2 + ? sink_from_str(logger_name, values[0], values[1], skip_prefix) + : sink_from_str(logger_name, values[0], "", skip_prefix); } catch (const std::invalid_argument &e) { std::cerr << "Error when creating a logger instance from environment variable" << e.what(); - return Logger(std::make_unique(logger_name)); + return Logger( + std::make_unique(logger_name, skip_prefix)); } sink->setFlushLevel(flush_level); diff --git a/source/common/logger/ur_logger_details.hpp b/source/common/logger/ur_logger_details.hpp index daf2a1c2fe..263a698154 100644 --- a/source/common/logger/ur_logger_details.hpp +++ b/source/common/logger/ur_logger_details.hpp @@ -48,6 +48,10 @@ class Logger { log(logger::Level::WARN, format, std::forward(args)...); } + template void warn(const char *format, Args &&...args) { + warning(format, std::forward(args)...); + } + template void error(const char *format, Args &&...args) { log(logger::Level::ERR, format, std::forward(args)...); } diff --git a/source/common/logger/ur_sinks.hpp b/source/common/logger/ur_sinks.hpp index a813730ed5..304ab2759f 100644 --- a/source/common/logger/ur_sinks.hpp +++ b/source/common/logger/ur_sinks.hpp @@ -23,8 +23,10 @@ class Sink { public: template void log(logger::Level level, const char *fmt, Args &&...args) { - *ostream << "<" << logger_name << ">"; - *ostream << "[" << level_to_str(level) << "]: "; + if (!skip_prefix) { + *ostream << "<" << logger_name << ">"; + *ostream << "[" << level_to_str(level) << "]: "; + } format(fmt, std::forward(args)...); *ostream << "\n"; if (level >= flush_level) { @@ -40,12 +42,14 @@ class Sink { std::ostream *ostream; logger::Level flush_level; - Sink(std::string logger_name) : logger_name(logger_name) { + Sink(std::string logger_name, bool skip_prefix = false) + : logger_name(logger_name), skip_prefix(skip_prefix) { flush_level = logger::Level::ERR; } private: std::string logger_name; + bool skip_prefix; void format(const char *fmt) { while (*fmt != '\0') { @@ -104,12 +108,14 @@ class Sink { class StdoutSink : public Sink { public: - StdoutSink(std::string logger_name) : Sink(logger_name) { + StdoutSink(std::string logger_name, bool skip_prefix = false) + : Sink(logger_name, skip_prefix) { this->ostream = &std::cout; } - StdoutSink(std::string logger_name, Level flush_lvl) - : StdoutSink(logger_name) { + StdoutSink(std::string logger_name, Level flush_lvl, + bool skip_prefix = false) + : StdoutSink(logger_name, skip_prefix) { this->flush_level = flush_lvl; } @@ -118,12 +124,13 @@ class StdoutSink : public Sink { class StderrSink : public Sink { public: - StderrSink(std::string logger_name) : Sink(logger_name) { + StderrSink(std::string logger_name, bool skip_prefix = false) + : Sink(logger_name, skip_prefix) { this->ostream = &std::cerr; } - StderrSink(std::string logger_name, Level flush_lvl) - : StderrSink(logger_name) { + StderrSink(std::string logger_name, Level flush_lvl, bool skip_prefix) + : StderrSink(logger_name, skip_prefix) { this->flush_level = flush_lvl; } @@ -132,8 +139,9 @@ class StderrSink : public Sink { class FileSink : public Sink { public: - FileSink(std::string logger_name, filesystem::path file_path) - : Sink(logger_name) { + FileSink(std::string logger_name, filesystem::path file_path, + bool skip_prefix = false) + : Sink(logger_name, skip_prefix) { ofstream = std::ofstream(file_path, std::ofstream::out); if (!ofstream.good()) { throw std::invalid_argument( @@ -145,8 +153,8 @@ class FileSink : public Sink { } FileSink(std::string logger_name, filesystem::path file_path, - Level flush_lvl) - : FileSink(logger_name, file_path) { + Level flush_lvl, bool skip_prefix = false) + : FileSink(logger_name, file_path, skip_prefix) { this->flush_level = flush_lvl; } @@ -156,14 +164,15 @@ class FileSink : public Sink { inline std::unique_ptr sink_from_str(std::string logger_name, std::string name, - std::string file_path = "") { + std::string file_path = "", + bool skip_prefix = false) { if (name == "stdout") { - return std::make_unique(logger_name); + return std::make_unique(logger_name, skip_prefix); } else if (name == "stderr") { - return std::make_unique(logger_name); + return std::make_unique(logger_name, skip_prefix); } else if (name == "file" && !file_path.empty()) { - return std::make_unique(logger_name, - file_path.c_str()); + return std::make_unique( + logger_name, file_path.c_str(), skip_prefix); } throw std::invalid_argument( diff --git a/source/common/ur_util.hpp b/source/common/ur_util.hpp index 6d2b49af7b..23077be4c9 100644 --- a/source/common/ur_util.hpp +++ b/source/common/ur_util.hpp @@ -176,7 +176,8 @@ using EnvVarMap = std::map>; /// Otherwise, optional is set to std::nullopt when the environment variable /// is not set or is empty. /// @throws std::invalid_argument() when the parsed environment variable has wrong format -inline std::optional getenv_to_map(const char *env_var_name) { +inline std::optional getenv_to_map(const char *env_var_name, + bool reject_empty = true) { char main_delim = ';'; char key_value_delim = ':'; char values_delim = ','; @@ -194,13 +195,13 @@ inline std::optional getenv_to_map(const char *env_var_name) { std::string values; std::stringstream kv_ss(key_value); - if (key_value.find(':') == std::string::npos) { + if (reject_empty && key_value.find(':') == std::string::npos) { throw_wrong_format_map(env_var_name); } std::getline(kv_ss, key, key_value_delim); std::getline(kv_ss, values); - if (key.empty() || values.empty() || + if (key.empty() || (reject_empty && values.empty()) || values.find(':') != std::string::npos || map.find(key) != map.end()) { throw_wrong_format_map(env_var_name); @@ -215,9 +216,6 @@ inline std::optional getenv_to_map(const char *env_var_name) { } values_vec.push_back(value); } - if (values_vec.empty()) { - throw_wrong_format_map(env_var_name); - } map[key] = values_vec; } return map; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6d54d7c193..ac2e8317d0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -18,3 +18,6 @@ add_subdirectory(conformance) add_subdirectory(unified_memory_allocation) add_subdirectory(layers) add_subdirectory(unit) +if(UR_BUILD_TOOLS) + add_subdirectory(tools) +endif() diff --git a/test/layers/tracing/hello_world.out.match b/test/layers/tracing/hello_world.out.match index 69746457e4..3df85ca49a 100644 --- a/test/layers/tracing/hello_world.out.match +++ b/test/layers/tracing/hello_world.out.match @@ -7,7 +7,7 @@ function_with_args_begin(3) - urPlatformGet(unimplemented); function_with_args_end(3) - urPlatformGet(...) -> ur_result_t(0); function_with_args_begin(4) - urPlatformGetApiVersion(unimplemented); function_with_args_end(4) - urPlatformGetApiVersion(...) -> ur_result_t(0); -API version: 0.6 +API version: {{0\.[0-9]+}} function_with_args_begin(5) - urDeviceGet(unimplemented); function_with_args_end(5) - urDeviceGet(...) -> ur_result_t(0); function_with_args_begin(6) - urDeviceGet(unimplemented); diff --git a/test/tools/CMakeLists.txt b/test/tools/CMakeLists.txt new file mode 100644 index 0000000000..b9a4afb0d1 --- /dev/null +++ b/test/tools/CMakeLists.txt @@ -0,0 +1,6 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: MIT + +if(UR_ENABLE_TRACING) + add_subdirectory(urtrace) +endif() diff --git a/test/tools/urtrace/CMakeLists.txt b/test/tools/urtrace/CMakeLists.txt new file mode 100644 index 0000000000..3ec38a11c0 --- /dev/null +++ b/test/tools/urtrace/CMakeLists.txt @@ -0,0 +1,23 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: MIT + +set(TEST_NAME trace-hello-world) + +function(add_trace_test name CLI_ARGS) + set(TEST_NAME trace_test_${name}) + add_test(NAME ${TEST_NAME} + COMMAND ${CMAKE_COMMAND} + -D TEST_FILE=${Python3_EXECUTABLE} + -D TEST_ARGS="${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/urtrace --stdout ${CLI_ARGS} $" + -D MATCH_FILE=${CMAKE_CURRENT_SOURCE_DIR}/${name}.match + -P ${PROJECT_SOURCE_DIR}/cmake/match.cmake + DEPENDS ur_trace_cli hello_world + ) + set_tests_properties(${TEST_NAME} PROPERTIES LABELS "urtrace") +endfunction() + +add_trace_test(null_hello "--libpath $ --null") +add_trace_test(null_hello_no_args "--libpath $ --null --no-args") +add_trace_test(null_hello_filter_device "--libpath $ --null --filter \".*Device.*\"") +add_trace_test(null_hello_profiling "--libpath $ --null --profiling --time-unit ns") +add_trace_test(null_hello_begin "--libpath $ --null --print-begin") diff --git a/test/tools/urtrace/null_hello.match b/test/tools/urtrace/null_hello.match new file mode 100644 index 0000000000..8c64b8ae15 --- /dev/null +++ b/test/tools/urtrace/null_hello.match @@ -0,0 +1,12 @@ +urInit(.device_flags = 0) -> UR_RESULT_SUCCESS; +Platform initialized. +urPlatformGet(.NumEntries = 1, .phPlatforms = [], .pNumPlatforms = {{.*}} (1)) -> UR_RESULT_SUCCESS; +urPlatformGet(.NumEntries = 1, .phPlatforms = [{{.*}}], .pNumPlatforms = nullptr) -> UR_RESULT_SUCCESS; +urPlatformGetApiVersion(.hDriver = {{.*}}, .pVersion = {{.*}} ({{.*}})) -> UR_RESULT_SUCCESS; +API version: {{.*}} +urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = [], .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; +urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = [{{.*}}], .pNumDevices = nullptr) -> UR_RESULT_SUCCESS; +urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = {{.*}}, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; +urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = {{.*}}, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; +Found a Null Device gpu. +urTearDown(.pParams = nullptr) -> UR_RESULT_SUCCESS; diff --git a/test/tools/urtrace/null_hello_begin.match b/test/tools/urtrace/null_hello_begin.match new file mode 100644 index 0000000000..852912d42b --- /dev/null +++ b/test/tools/urtrace/null_hello_begin.match @@ -0,0 +1,21 @@ +begin(1) - urInit(.device_flags = 0); +end(1) - urInit(.device_flags = 0) -> UR_RESULT_SUCCESS; +Platform initialized. +begin(2) - urPlatformGet(.NumEntries = 1, .phPlatforms = [], .pNumPlatforms = {{.*}} (0)); +end(2) - urPlatformGet(.NumEntries = 1, .phPlatforms = [], .pNumPlatforms = {{.*}} (1)) -> UR_RESULT_SUCCESS; +begin(3) - urPlatformGet(.NumEntries = 1, .phPlatforms = [nullptr], .pNumPlatforms = nullptr); +end(3) - urPlatformGet(.NumEntries = 1, .phPlatforms = [{{.*}}], .pNumPlatforms = nullptr) -> UR_RESULT_SUCCESS; +begin(4) - urPlatformGetApiVersion(.hDriver = {{.*}}, .pVersion = {{.*}} (0.0)); +end(4) - urPlatformGetApiVersion(.hDriver = {{.*}}, .pVersion = {{.*}} (0.6)) -> UR_RESULT_SUCCESS; +API version: {{.*}} +begin(5) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = [], .pNumDevices = {{.*}} (0)); +end(5) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = [], .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; +begin(6) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = [nullptr], .pNumDevices = nullptr); +end(6) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = [{{.*}}], .pNumDevices = nullptr) -> UR_RESULT_SUCCESS; +begin(7) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = 4, .pPropValue = {{.*}}, .pPropSizeRet = nullptr); +end(7) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = 4, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; +begin(8) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = 1023, .pPropValue = {{.*}}, .pPropSizeRet = nullptr); +end(8) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = 1023, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; +Found a Null Device gpu. +begin(9) - urTearDown(.pParams = nullptr); +end(9) - urTearDown(.pParams = nullptr) -> UR_RESULT_SUCCESS; diff --git a/test/tools/urtrace/null_hello_filter_device.match b/test/tools/urtrace/null_hello_filter_device.match new file mode 100644 index 0000000000..88d2cb689c --- /dev/null +++ b/test/tools/urtrace/null_hello_filter_device.match @@ -0,0 +1,7 @@ +Platform initialized. +API version: {{.*}} +urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = [], .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; +urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = [{{.*}}], .pNumDevices = nullptr) -> UR_RESULT_SUCCESS; +urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = 4, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; +urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = 1023, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; +Found a Null Device gpu. diff --git a/test/tools/urtrace/null_hello_no_args.match b/test/tools/urtrace/null_hello_no_args.match new file mode 100644 index 0000000000..b4edbe85f7 --- /dev/null +++ b/test/tools/urtrace/null_hello_no_args.match @@ -0,0 +1,12 @@ +urInit(...) -> UR_RESULT_SUCCESS; +Platform initialized. +urPlatformGet(...) -> UR_RESULT_SUCCESS; +urPlatformGet(...) -> UR_RESULT_SUCCESS; +urPlatformGetApiVersion(...) -> UR_RESULT_SUCCESS; +API version: {{.*}} +urDeviceGet(...) -> UR_RESULT_SUCCESS; +urDeviceGet(...) -> UR_RESULT_SUCCESS; +urDeviceGetInfo(...) -> UR_RESULT_SUCCESS; +urDeviceGetInfo(...) -> UR_RESULT_SUCCESS; +Found a Null Device gpu. +urTearDown(...) -> UR_RESULT_SUCCESS; diff --git a/test/tools/urtrace/null_hello_profiling.match b/test/tools/urtrace/null_hello_profiling.match new file mode 100644 index 0000000000..cc6b4e442c --- /dev/null +++ b/test/tools/urtrace/null_hello_profiling.match @@ -0,0 +1,12 @@ +urInit(.device_flags = 0) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) +Platform initialized. +urPlatformGet(.NumEntries = 1, .phPlatforms = [], .pNumPlatforms = {{.*}} (1)) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) +urPlatformGet(.NumEntries = 1, .phPlatforms = [{{.*}}], .pNumPlatforms = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) +urPlatformGetApiVersion(.hDriver = {{.*}}, .pVersion = {{.*}} ({{.*}})) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) +API version: {{.*}} +urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = [], .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) +urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = [{{.*}}], .pNumDevices = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) +urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = {{.*}}, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) +urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = {{.*}}, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) +Found a Null Device gpu. +urTearDown(.pParams = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt new file mode 100644 index 0000000000..a6c16bf7a8 --- /dev/null +++ b/tools/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: MIT + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) + +if(UR_ENABLE_TRACING) + add_subdirectory(urtrace) +endif() diff --git a/tools/urtrace/CMakeLists.txt b/tools/urtrace/CMakeLists.txt new file mode 100644 index 0000000000..272e943ccd --- /dev/null +++ b/tools/urtrace/CMakeLists.txt @@ -0,0 +1,26 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: MIT + +set(TARGET_NAME ur_collector) + +add_library(${TARGET_NAME} SHARED + ${CMAKE_CURRENT_SOURCE_DIR}/collector.cpp +) + +target_include_directories(${TARGET_NAME} PRIVATE + ${CMAKE_SOURCE_DIR}/include +) + +target_link_libraries(${TARGET_NAME} PRIVATE xpti ${PROJECT_NAME}::common ${CMAKE_DL_LIBS}) +target_include_directories(${TARGET_NAME} PRIVATE ${xpti_SOURCE_DIR}/include) + +if(MSVC) + target_compile_definitions(${TARGET_NAME} PRIVATE + XPTI_STATIC_LIBRARY XPTI_CALLBACK_API_EXPORTS) +endif() + +set(UR_TRACE_CLI ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/urtrace) + +add_custom_target(ur_trace_cli) +add_custom_command(TARGET ur_trace_cli PRE_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/urtrace ${UR_TRACE_CLI}) +add_dependencies(ur_collector ur_trace_cli) diff --git a/tools/urtrace/README.md b/tools/urtrace/README.md new file mode 100644 index 0000000000..c22e29c968 --- /dev/null +++ b/tools/urtrace/README.md @@ -0,0 +1,35 @@ +# Unified Runtime tracing tool + +urtrace is a command-line tool for tracing and profiling Unified Runtime library +calls in a process. It can be used to inspect the function arguments of each call, +and provides options for filtering, output formatting, and profiling. + +The tool is implemented in Python as a CLI, which uses UR's XPTI-based tracing layer +to register the subscriber to UR function traces. The XPTI subscriber is written in C++ and +is responsible for handling the performance-sensitive aspects of the tool, +such as profiling and function tracing. The necessary arguments from the +Python CLI to the C++ collector are passed through an environment variable. + +The urtrace tool requires unified runtime loader to be built with XPTI support +and that the xptifw is present in the system in a location where urtrace can find +it. The locations where the tool looks for dynamic libraries, such as xptifw, can +be specified with the option `--libpath`. + +See [XPTI framework github repository](https://github.com/intel/llvm/tree/sycl/xptifw) for more information. + +## Examples + +See `urtrace --help` to get detailed information on its usage. +Here are a few examples: + +### Trace all UR calls made by `./myapp --my-arg` +urtrace ./myapp --my-arg + +### Trace and profile only UR functions that match the regex ".*(Device|Platform).*" +urtrace --profiling --filter ".*(Device|Platform).*" ./hello_world + +### Use a custom adapter and also trace function begins +urtrace --adapter libur_adapter_cuda.so --begin ./sycl_app + +### Force load the null adapter and look for it in a custom path +urtrace --null --libpath /opt/custom/ ./foo diff --git a/tools/urtrace/collector.cpp b/tools/urtrace/collector.cpp new file mode 100644 index 0000000000..1fbaa54996 --- /dev/null +++ b/tools/urtrace/collector.cpp @@ -0,0 +1,310 @@ +/* + * Copyright (C) 2023 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + * @file collector.cpp + * + * This file contains the implementation of the UR collector library. The library + * provides instrumentation for tracing function calls and profiling function + * execution time. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "logger/ur_logger.hpp" +#include "ur_api.h" +#include "ur_params.hpp" +#include "xpti/xpti_trace_framework.h" + +constexpr uint16_t TRACE_FN_BEGIN = + static_cast(xpti::trace_point_type_t::function_with_args_begin); +constexpr uint16_t TRACE_FN_END = + static_cast(xpti::trace_point_type_t::function_with_args_end); +constexpr std::string_view UR_STREAM_NAME = "ur"; + +logger::Logger out = logger::create_logger("collector", true); + +constexpr const char *ARGS_ENV = "UR_COLLECTOR_ARGS"; + +enum time_unit { + TIME_UNIT_AUTO, + TIME_UNIT_NS, + TIME_UNIT_US, + TIME_UNIT_MS, + TIME_UNIT_S, + MAX_TIME_UNIT, +}; + +const char *time_unit_str[MAX_TIME_UNIT] = {"auto", "ns", "us", "ms", "s"}; + +std::string time_to_str(std::chrono::nanoseconds dur, enum time_unit unit) { + std::ostringstream ostr; + + switch (unit) { + case TIME_UNIT_AUTO: { + if (dur.count() < 1000) { + return time_to_str(dur, TIME_UNIT_NS); + } + if (dur.count() < 1000 * 1000) { + return time_to_str(dur, TIME_UNIT_US); + } + if (dur.count() < 1000 * 1000 * 1000) { + return time_to_str(dur, TIME_UNIT_MS); + } + return time_to_str(dur, TIME_UNIT_S); + } break; + case TIME_UNIT_NS: { + ostr << dur.count() << "ns"; + } break; + case TIME_UNIT_US: { + std::chrono::duration d = dur; + ostr << d.count() << "us"; + } break; + case TIME_UNIT_MS: { + std::chrono::duration d = dur; + ostr << d.count() << "ms"; + } break; + case TIME_UNIT_S: { + std::chrono::duration> d = dur; + ostr << d.count() << "s"; + } break; + default: { + out.error("invalid time unit {}", unit); + break; + } + } + + return ostr.str(); +} + +/* + * Since this is a library that gets loaded alongside the traced program, it + * can't just accept arguments from the trace CLI tool directly. Instead, the + * arguments are passed through an environment variable. Users shouldn't set + * these directly. + * + * Available arguments (documented in trace cli tool): + * - "print_begin" + * - "profiling" + * - "time_unit:" + * - "filter:" + */ +static class cli_args { + std::optional + arg_with_value(std::string_view name, const std::string arg_name, + const std::vector arg_values) { + if (arg_name != name) { + return std::nullopt; + } + if (arg_values.size() != 1) { + out.warn("{} requires a single argument, skipping...", name); + return std::nullopt; + } + return arg_values.at(0); + } + + public: + cli_args() { + print_begin = false; + profiling = false; + time_unit = TIME_UNIT_AUTO; + no_args = false; + filter = std::nullopt; + filter_str = std::nullopt; + if (auto args = getenv_to_map(ARGS_ENV, false)) { + for (auto [arg_name, arg_values] : *args) { + if (arg_name == "print_begin") { + print_begin = true; + } else if (arg_name == "profiling") { + profiling = true; + } else if (arg_name == "no_args") { + no_args = true; + } else if (auto unit = arg_with_value("time_unit", arg_name, + arg_values)) { + for (int i = 0; i < MAX_TIME_UNIT; ++i) { + if (time_unit_str[i] == unit) { + time_unit = (enum time_unit)i; + break; + } + } + } else if (auto filter_str = + arg_with_value("filter", arg_name, arg_values)) { + try { + filter = filter_str; + } catch (const std::regex_error &err) { + out.warn("invalid filter regex {} {}", *filter_str, + err.what()); + } + } else { + out.warn("unknown {} argument {}.", ARGS_ENV, arg_name); + } + } + } + out.debug("collector args (.print_begin = {}, .profiling = {}, " + ".time_unit = {}, .filter = {})", + print_begin, profiling, time_unit_str[time_unit], + filter_str.has_value() ? *filter_str : "none"); + } + + enum time_unit time_unit; + bool print_begin; + bool profiling; + bool no_args; + std::optional + filter_str; //the filter_str is kept primarly for printing. + std::optional filter; +} cli_args; + +using namespace ur_params; + +typedef std::chrono::high_resolution_clock Clock; + +struct fn_context { + uint64_t instance; + std::optional> start; +}; + +static thread_local std::stack instance_data; + +fn_context *push_instance_data(uint64_t instance) { + instance_data.push(fn_context{instance, std::nullopt}); + return &instance_data.top(); +} + +std::optional pop_instance_data(uint64_t instance) { + if (instance_data.empty()) { + return std::nullopt; + } + auto data = instance_data.top(); + if (data.instance != instance) { + return std::nullopt; + } + instance_data.pop(); + return data; +} + +void trace_begin(const xpti::function_with_args_t *args, uint64_t instance, + fn_context *ctx) { + if (cli_args.print_begin) { + std::ostringstream args_str; + if (cli_args.no_args) { + args_str << "..."; + } else { + ur_params::serializeFunctionParams(args_str, args->function_id, + args->args_data); + } + out.info("begin({}) - {}({});", instance, args->function_name, + args_str.str()); + } + // start the clock as the very last thing this function does to minimize + // tracing overheads + if (cli_args.profiling) { + ctx->start = Clock::now(); + } +} + +void trace_end(const xpti::function_with_args_t *args, uint64_t instance, + fn_context ctx, std::chrono::time_point time) { + std::ostringstream args_str; + if (cli_args.no_args) { + args_str << "..."; + } else { + ur_params::serializeFunctionParams(args_str, args->function_id, + args->args_data); + } + + std::ostringstream prefix_str; + if (cli_args.print_begin) { + prefix_str << "end(" << instance << ") - "; + } + auto result = static_cast(args->ret_data); + + std::ostringstream profile_str; + if (ctx.start && cli_args.profiling) { + auto dur = std::chrono::duration_cast( + time - *ctx.start); + profile_str << " (" << time_to_str(dur, cli_args.time_unit) << ")"; + } + out.info("{}{}({}) -> {};{}", prefix_str.str(), args->function_name, + args_str.str(), *result, profile_str.str()); +} + +XPTI_CALLBACK_API void trace_cb(uint16_t trace_type, + xpti::trace_event_data_t *parent, + xpti::trace_event_data_t *event, + uint64_t instance, const void *user_data) { + // stop the the clock as the very first thing, only used for TRACE_FN_END + auto time = Clock::now(); + auto *args = static_cast(user_data); + + if (auto regex = cli_args.filter) { + if (!std::regex_match(args->function_name, *regex)) { + out.debug("function {} does not match regex filter, skipping...", + args->function_name); + return; + } + } + + if (trace_type == TRACE_FN_BEGIN) { + auto ctx = push_instance_data(instance); + trace_begin(args, instance, ctx); + } else if (trace_type == TRACE_FN_END) { + auto ctx = pop_instance_data(instance); + if (!ctx) { + out.error("Received TRACE_FN_END without corresponding " + "TRACE_FN_BEGIN, instance {}. Skipping...", + instance); + return; + } + trace_end(args, instance, *ctx, time); + } else { + out.warn("unsupported trace type"); + } +} + +/** + * @brief Subscriber initialization function called by the XPTI dispatcher. + * + * Called for every stream. + */ +XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version, + unsigned int minor_version, + const char *version_str, + const char *stream_name) { + if (!stream_name || std::string_view(stream_name) != UR_STREAM_NAME) { + out.debug("Found stream: {}. Expected: {}. Skipping...", stream_name, + UR_STREAM_NAME); + return; + } + + if (UR_MAKE_VERSION(major_version, minor_version) != + UR_API_VERSION_CURRENT) { + out.error("Invalid stream version: {}.{}. Expected: {}.{}. Skipping...", + major_version, minor_version, + UR_MAJOR_VERSION(UR_API_VERSION_CURRENT), + UR_MINOR_VERSION(UR_API_VERSION_CURRENT)); + return; + } + + uint8_t stream_id = xptiRegisterStream(stream_name); + + out.debug("Registered stream {} ({}.{}).", stream_name, major_version, + minor_version); + + xptiRegisterCallback(stream_id, TRACE_FN_BEGIN, trace_cb); + xptiRegisterCallback(stream_id, TRACE_FN_END, trace_cb); +} + +/** + * @brief Subscriber finish function called by the XPTI dispatcher. + */ +XPTI_CALLBACK_API void xptiTraceFinish(const char *stream_name) { /* noop */ +} diff --git a/tools/urtrace/urtrace b/tools/urtrace/urtrace new file mode 100755 index 0000000000..86ad39ce2d --- /dev/null +++ b/tools/urtrace/urtrace @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: MIT +import argparse +import subprocess +import os +import sys + +def find_library(paths, name, recursive=False): + for path in paths: + for root, _, files in os.walk(path): + for f in files: + if f == name: + return os.path.abspath(os.path.join(root, name)) + if not recursive: + break + return None + +def is_filename(string): # checks whether a string is just a filename or a path + filename = os.path.basename(string) + return filename == string + +def get_dynamic_library_name(name): + if sys.platform.startswith("linux"): + return "lib{}.so".format(name) + elif sys.platform == "win32": + return "{}.dll".format(name) + else: + sys.exit("Unsupported platform: {}".format(sys.platform)) + +parser = argparse.ArgumentParser( + description = """Unified Runtime tracing tool. + %(prog)s is a program that runs the specified command until its exit, + intercepting and recording all unified runtime library calls from the executed process. + It has support for rich printing of function arguments, and can also perform + rudimentary profiling of unified runtime functions.""", + epilog='''examples: + + %(prog)s ./myapp --myapp-arg + %(prog)s --null --profiling --filter ".*(Device|Platform).*" ./hello_world + %(prog)s --adapter libur_adapter_cuda.so --begin ./sycl_app''', + formatter_class=argparse.RawDescriptionHelpFormatter) +parser.add_argument("command", help="Command to run, including arguments.", nargs=argparse.REMAINDER) +parser.add_argument("--profiling", help="Measure function execution time.", action="store_true") +parser.add_argument("--filter", help="Only trace functions that match the provided regex filter.") +parser.add_argument("--null", help="Force the use of the null adapter.", action="store_true") +parser.add_argument("--adapter", help="Force the use of the provided adapter.", action="append", default=[]) +group = parser.add_mutually_exclusive_group() +group.add_argument("--file", help="Write trace output to a file with the given name instead of stderr.") +group.add_argument("--stdout", help="Write trace output to stdout instead of stderr.", action="store_true") +parser.add_argument("--no-args", help="Don't pretty print traced functions arguments.", action="store_true") +parser.add_argument("--print-begin", help="Print on function begin.", action="store_true") +parser.add_argument("--time-unit", choices=['ns', 'us', 'ms', 's', 'auto'], default='auto', help="Use a specific unit of time for profiling.") +parser.add_argument("--libpath", default=['.', '../lib/', '/lib/', '/usr/local/lib/', '/usr/lib/'], action="append", help="Search path for adapters and xpti libraries.") +parser.add_argument("--recursive", help="Use recursive library search.", action="store_true") +parser.add_argument("--debug", help="Print tool debug information.", action="store_true") +args = parser.parse_args() +config = vars(args) +if args.debug: + print(config) +env = dict() + +env['UR_LOG_COLLECTOR']="level:info;output:stderr;flush:info" +collector_args = "" +if args.print_begin: + collector_args += "print_begin;" +if args.profiling: + collector_args += "profiling;" +if args.time_unit: + collector_args += "time_unit:" + args.time_unit + ";" +if args.filter: + collector_args += "filter:" + args.filter + ";" +if args.no_args: + collector_args += "no_args;" +env['UR_COLLECTOR_ARGS'] = collector_args + +log_collector = "" +if args.debug: + log_collector += "level:debug;" +else: + log_collector += "level:info;" +if args.file: + log_collector += "output:file," + args.file + ";" +elif args.stdout: + log_collector += "output:stdout;" +else: + log_collector += "output:stderr;" +log_collector += "flush:error" +env['UR_LOG_COLLECTOR'] = log_collector + +env['XPTI_TRACE_ENABLE'] = "1" + +xptifw_lib = get_dynamic_library_name("xptifw") +xptifw = find_library(args.libpath, xptifw_lib, args.recursive) +if xptifw is None: + sys.exit("unable to find xptifw library - " + xptifw_lib) +env['XPTI_FRAMEWORK_DISPATCHER'] = xptifw + +collector_lib = get_dynamic_library_name("ur_collector") + +collector = find_library(args.libpath, collector_lib, args.recursive) +if collector is None: + sys.exit("unable to find collector library - " + collector_lib) +env['XPTI_SUBSCRIBERS'] = collector + +force_load = None + +if args.null: + null_lib = get_dynamic_library_name("ur_adapter_null") + null_adapter = find_library(args.libpath, null_lib, args.recursive) + if null_adapter is None: + sys.exit("unable to find the null adapter - " + null_lib) + force_load = null_adapter + +for adapter in args.adapter: + adapter_path = find_library(args.libpath, adapter, args.recursive) if is_filename(adapter) else adapter + if adapter_path is None: + sys.exit("adapter does not specify a valid file " + args.adapter) + if force_load is not None: + force_load += "," + adapter_path + else: + force_load = adapter_path + +if force_load: + env['UR_ADAPTERS_FORCE_LOAD'] = force_load + +if args.debug: + print(env) + +result = subprocess.run(config['command'], env=env) +if args.debug: + print(result) + +exit(result.returncode)