diff --git a/libkineto/src/CudaDeviceProperties.cpp b/libkineto/src/CudaDeviceProperties.cpp index eda37faf0..d5e9252ab 100644 --- a/libkineto/src/CudaDeviceProperties.cpp +++ b/libkineto/src/CudaDeviceProperties.cpp @@ -7,10 +7,7 @@ #include "CudaDeviceProperties.h" -#include - #include -#include #include "Logger.h" diff --git a/libkineto/src/CudaDeviceProperties.h b/libkineto/src/CudaDeviceProperties.h index 8156c8b7d..02886b25c 100644 --- a/libkineto/src/CudaDeviceProperties.h +++ b/libkineto/src/CudaDeviceProperties.h @@ -7,10 +7,14 @@ #pragma once +#include #include +#include namespace KINETO_NAMESPACE { +const std::vector& occDeviceProps(); + float kernelOccupancy( uint32_t deviceId, uint16_t registersPerThread, diff --git a/libkineto/src/output_json.cpp b/libkineto/src/output_json.cpp index 50577329e..7e6bb391b 100644 --- a/libkineto/src/output_json.cpp +++ b/libkineto/src/output_json.cpp @@ -38,6 +38,37 @@ void ChromeTraceLogger::handleTraceStart( "schemaVersion": {}, )JSON", kSchemaVersion); +#ifdef HAS_CUPTI + const std::vector& occProps = KINETO_NAMESPACE::occDeviceProps(); + if (occProps.size() > 0) { + std::ostringstream oss; + oss << "["; + for (size_t i = 0; i < occProps.size(); i += 1) { + const cudaOccDeviceProp& occProp = occProps[i]; + if (i > 0) { + oss << ", "; + } + oss << "{"; + oss << "\"computeMajor\": " << occProp.computeMajor << ", "; + oss << "\"computeMinor\": " << occProp.computeMinor << ", "; + oss << "\"maxThreadsPerBlock\": " << occProp.maxThreadsPerBlock << ", "; + oss << "\"maxThreadsPerMultiprocessor\": " << occProp.maxThreadsPerMultiprocessor << ", "; + oss << "\"regsPerBlock\": " << occProp.regsPerBlock << ", "; + oss << "\"regsPerMultiprocessor\": " << occProp.regsPerMultiprocessor << ", "; + oss << "\"warpSize\": " << occProp.warpSize << ", "; + oss << "\"sharedMemPerBlock\": " << occProp.sharedMemPerBlock << ", "; + oss << "\"sharedMemPerMultiprocessor\": " << occProp.sharedMemPerMultiprocessor << ", "; + oss << "\"numSms\": " << occProp.numSms << ", "; + oss << "\"sharedMemPerBlockOptin\": " << occProp.sharedMemPerBlockOptin; + oss << "}"; + } + oss << "]"; + traceOf_ << fmt::format(R"JSON( + "cudaOccDeviceProps": {}, + )JSON", oss.str()); + } +#endif // HAS_CUPTI + if (!metadata.empty()) { traceOf_ << R"JSON( "metadata": {