forked from celeritas-project/celeritas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ScopedProfiling.hh
135 lines (120 loc) · 3.74 KB
/
ScopedProfiling.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
//----------------------------------*-C++-*----------------------------------//
// Copyright 2023-2024 UT-Battelle, LLC, and other Celeritas developers.
// See the top-level COPYRIGHT file for details.
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
//---------------------------------------------------------------------------//
//! \file corecel/sys/ScopedProfiling.hh
//---------------------------------------------------------------------------//
#pragma once
#include <cstdint>
#include <string>
#include "celeritas_config.h"
#include "corecel/Macros.hh"
namespace celeritas
{
//---------------------------------------------------------------------------//
/*!
* Input arguments for the nvtx implementation.
*/
struct ScopedProfilingInput
{
std::string_view name; //!< Name of the range
uint32_t color{}; //!< ARGB
int32_t payload{}; //!< User data
uint32_t category{}; //!< Category, used to group ranges together
ScopedProfilingInput(std::string_view n) : name{n} {}
};
//---------------------------------------------------------------------------//
/*!
* RAII class for scoped profiling.
*
* Implementations should support multithreaded context where each thread have
* one or more alive instance of this class.
*
* This is useful for wrapping specific code fragment in a range for profiling,
* e.g. ignoring of VecGeom instantiation kernels, profiling a specific action
* or loop on the CPU.
*
* \note The nvtx implementation of \c ScopedProfiling only does something when
* the application using Celeritas is ran through a tool that supports nvtx,
* e.g. nsight compute with the --nvtx argument. If this is not the case, API
* calls to nvtx are no-ops.
*
* \note The AMD roctx implementation requires the roctx library, which may not
* be available on all systems.
*/
class ScopedProfiling
{
public:
//!@{
//! \name Type aliases
using Input = ScopedProfilingInput;
//!@}
public:
#if CELER_USE_DEVICE
// Whether profiling is enabled
static bool use_profiling();
#else
// Profiling is never enabled if CUDA isn't available
constexpr static bool use_profiling() { return false; }
#endif
// Activate profiling with options
explicit inline ScopedProfiling(Input const& input);
// Activate profiling with just a name
explicit inline ScopedProfiling(std::string_view name);
// Deactivate profiling
inline ~ScopedProfiling();
//!@{
//! Prevent copying and moving for RAII class
CELER_DELETE_COPY_MOVE(ScopedProfiling);
//!@}
private:
bool activated_;
void activate(Input const& input) noexcept;
void deactivate() noexcept;
};
//---------------------------------------------------------------------------//
// INLINE DEFINITIONS
//---------------------------------------------------------------------------//
/*!
* Activate device profiling with options.
*/
ScopedProfiling::ScopedProfiling(Input const& input)
: activated_{ScopedProfiling::use_profiling()}
{
if (activated_)
{
this->activate(input);
}
}
//---------------------------------------------------------------------------//
/*!
* Activate device profiling with just a name.
*/
ScopedProfiling::ScopedProfiling(std::string_view name)
: ScopedProfiling{Input{name}}
{
}
//---------------------------------------------------------------------------//
/*!
* Deactivate a profiling scope.
*/
ScopedProfiling::~ScopedProfiling()
{
if (activated_)
{
this->deactivate();
}
}
#if !CELER_USE_DEVICE
inline void ScopedProfiling::activate(Input const&) noexcept
{
CELER_UNREACHABLE;
}
inline void ScopedProfiling::deactivate() noexcept
{
CELER_UNREACHABLE;
}
#endif
//---------------------------------------------------------------------------//
} // namespace celeritas