Skip to content

Commit

Permalink
increase clang-tidy coverage to more c10 source files (#102902)
Browse files Browse the repository at this point in the history
Pull Request resolved: #102902
Approved by: https://github.com/Skylion007
  • Loading branch information
cyyever authored and pytorchmergebot committed Jun 4, 2023
1 parent 992bffe commit 87cbfe9
Show file tree
Hide file tree
Showing 16 changed files with 87 additions and 79 deletions.
3 changes: 2 additions & 1 deletion .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ command = [
[[linter]]
code = 'CLANGTIDY'
include_patterns = [
'c10/core/**/*.cpp',
'c10/**/*.cpp',
'torch/csrc/fx/**/*.cpp',
'torch/csrc/generic/**/*.cpp',
'torch/csrc/onnx/**/*.cpp',
Expand All @@ -239,6 +239,7 @@ exclude_patterns = [
# FunctionsManual.cpp is excluded to keep this diff clean. It will be fixed
# in a follow up PR.
# that are not easily converted to accepted c++
'c10/cuda/**/*.cpp',
'c10/test/**/*.cpp',
'torch/csrc/jit/passes/onnx/helper.cpp',
'torch/csrc/jit/passes/onnx/shape_type_inference.cpp',
Expand Down
1 change: 0 additions & 1 deletion c10/benchmark/intrusive_ptr_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
using c10::intrusive_ptr;
using c10::intrusive_ptr_target;
using c10::make_intrusive;
using c10::weak_intrusive_ptr;

namespace {

Expand Down
4 changes: 2 additions & 2 deletions c10/core/TensorImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ is_non_overlapping_and_dense
* backend.
**/
struct C10_API BackendMeta : intrusive_ptr_target {
virtual ~BackendMeta(){};
~BackendMeta() override = default;
virtual intrusive_ptr<BackendMeta> clone(
const intrusive_ptr<BackendMeta>& ptr) const {
return ptr;
Expand Down Expand Up @@ -263,7 +263,7 @@ struct C10_API ExtraMeta {
c10::optional<std::string> custom_data_ptr_error_msg_ = c10::nullopt)
: symbolic_shape_meta_(std::move(symbolic_shape_meta)),
named_tensor_meta_(std::move(named_tensor_meta)),
backend_meta_(backend_meta) {}
backend_meta_(std::move(backend_meta)) {}

std::unique_ptr<ExtraMeta> clone() const {
return std::make_unique<ExtraMeta>(*this);
Expand Down
75 changes: 38 additions & 37 deletions c10/cuda/CUDACachingAllocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <cuda_runtime_api.h>
#include <algorithm>
#include <bitset>
#include <cstddef>
#include <cstdint>
#include <deque>
#include <iostream>
Expand Down Expand Up @@ -362,14 +363,14 @@ struct ExpandableSegment {
int device,
cudaStream_t stream,
size_t size,
const std::vector<int>& peers)
std::vector<int> peers)
: device_(device),
stream_(stream),
max_handles_(0),
// 2MB for small pool, 20MB for large pool
segment_size_(size),
peers_(peers) {
cudaDeviceProp prop;
peers_(std::move(peers)) {
cudaDeviceProp prop{};
C10_CUDA_CHECK(cudaGetDeviceProperties(&prop, device_));
// we allocate enough address space for 1 1/8 the total memory on the GPU.
// This allows for some cases where we have to unmap pages earlier in the
Expand All @@ -390,11 +391,11 @@ struct ExpandableSegment {
return rangeFromHandles(begin, end);
}
while (end > handles_.size()) {
handles_.push_back(c10::nullopt);
handles_.emplace_back(c10::nullopt);
}
for (auto i : c10::irange(begin, end)) {
TORCH_INTERNAL_ASSERT(!handles_.at(i));
CUmemGenericAllocationHandle handle;
CUmemGenericAllocationHandle handle = 0;
CUmemAllocationProp prop = {};
prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
Expand Down Expand Up @@ -523,7 +524,7 @@ struct ExpandableSegment {
}
int device_;
cudaStream_t stream_;
CUdeviceptr ptr_;
CUdeviceptr ptr_{};
size_t max_handles_;
size_t segment_size_;
std::vector<c10::optional<CUmemGenericAllocationHandle>> handles_;
Expand Down Expand Up @@ -561,7 +562,7 @@ struct ExpandableSegment {
// [Checkpointing PrivatePoolState]
struct BlockState {
int device = 0;
cudaStream_t stream = 0;
cudaStream_t stream = nullptr;
stream_set stream_uses = {};
size_t size = 0;
void* ptr = nullptr;
Expand Down Expand Up @@ -715,8 +716,8 @@ struct PrivatePool {
PrivatePool()
: use_count(1),
cudaMalloc_count(0),
large_blocks(/*is_small=*/false, this),
small_blocks(/*is_small=*/true, this) {}
large_blocks(/*small=*/false, this),
small_blocks(/*small=*/true, this) {}
PrivatePool(const PrivatePool&) = delete;
PrivatePool(PrivatePool&&) = delete;
PrivatePool& operator=(const PrivatePool&) = delete;
Expand Down Expand Up @@ -759,7 +760,7 @@ SegmentState::SegmentState(Block* head) {
PrivatePoolState::PrivatePoolState(
MempoolId_t pool_id,
const std::vector<Block*>& private_pool_head_blocks)
: owner_id(pool_id) {
: owner_id(std::move(pool_id)) {
for (Block* head : private_pool_head_blocks) {
segments.emplace_back(head);
}
Expand Down Expand Up @@ -891,7 +892,7 @@ void CachingAllocatorConfig::lexArgs(
size_t env_length = strlen(env);
for (size_t i = 0; i < env_length; i++) {
if (env[i] == ',' || env[i] == ':' || env[i] == '[' || env[i] == ']') {
if (buf.size() != 0) {
if (!buf.empty()) {
config.emplace_back(buf.begin(), buf.end());
buf.clear();
}
Expand Down Expand Up @@ -964,7 +965,7 @@ size_t CachingAllocatorConfig::parseRoundUpPower2Divisions(
if (config[i].compare("[") == 0) {
size_t last_index = 0;
while (++i < config.size() && config[i].compare("]") != 0) {
std::string val1 = config[i];
const std::string& val1 = config[i];
size_t val2 = 0;

consumeToken(config, ++i, ':');
Expand Down Expand Up @@ -1048,7 +1049,7 @@ size_t CachingAllocatorConfig::parseAllocatorConfig(
used_cudaMallocAsync = (config[i] == "cudaMallocAsync");
if (used_cudaMallocAsync) {
#if CUDA_VERSION >= 11040
int version;
int version = 0;
C10_CUDA_CHECK(cudaDriverGetVersion(&version));
TORCH_CHECK(
version >= 11040,
Expand Down Expand Up @@ -1131,7 +1132,7 @@ static std::string reportProcessMemoryInfo(int device) {
TORCH_INTERNAL_ASSERT(NVML_SUCCESS == DriverAPI::get()->nvmlInit_v2_());
});

cudaDeviceProp prop;
cudaDeviceProp prop{};
C10_CUDA_CHECK(cudaGetDeviceProperties(&prop, device));

char pci_id[80];
Expand All @@ -1143,7 +1144,7 @@ static std::string reportProcessMemoryInfo(int device) {
prop.pciBusID,
prop.pciDeviceID);

nvmlDevice_t nvml_device;
nvmlDevice_t nvml_device = nullptr;
TORCH_INTERNAL_ASSERT(
NVML_SUCCESS ==
DriverAPI::get()->nvmlDeviceGetHandleByPciBusId_v2_(
Expand Down Expand Up @@ -1250,8 +1251,8 @@ class DeviceCachingAllocator {

public:
DeviceCachingAllocator()
: large_blocks(/*is_small=*/false),
small_blocks(/*is_small=*/true),
: large_blocks(/*small=*/false),
small_blocks(/*small=*/true),
alloc_trace(new std::vector<TraceEntry>()) {
stats.max_split_size = CachingAllocatorConfig::max_split_size();
context_recorder_.store(nullptr);
Expand Down Expand Up @@ -1280,7 +1281,7 @@ class DeviceCachingAllocator {
const std::unordered_set<void*>& expected_live_allocations) {
std::unique_lock<std::recursive_mutex> lock(mutex);

PrivatePool* pool;
PrivatePool* pool = nullptr;
auto pool_it = graph_pools.find(mempool_id);
TORCH_CHECK(pool_it != graph_pools.end(), "Could not find pool of id");
pool = pool_it->second.get();
Expand Down Expand Up @@ -1370,8 +1371,8 @@ class DeviceCachingAllocator {
// alloc_block should have thrown an exception already.
TORCH_INTERNAL_ASSERT(params.err == cudaErrorMemoryAllocation);

size_t device_free;
size_t device_total;
size_t device_free = 0;
size_t device_total = 0;
C10_CUDA_CHECK(cudaMemGetInfo(&device_free, &device_total));
std::string allowed_info;

Expand Down Expand Up @@ -1660,8 +1661,8 @@ class DeviceCachingAllocator {

/** set memory fraction to limit maximum allocated memory **/
void setMemoryFraction(double fraction) {
size_t device_free;
size_t device_total;
size_t device_free = 0;
size_t device_total = 0;
C10_CUDA_CHECK(cudaMemGetInfo(&device_free, &device_total));
allowed_memory_maximum = static_cast<size_t>(fraction * device_total);
set_fraction = true;
Expand All @@ -1678,7 +1679,7 @@ class DeviceCachingAllocator {
std::lock_guard<std::recursive_mutex> lock(mutex);
if (*largest ==
0) { // make an initial guess if a zero *largest is passed in
size_t tmp_bytes;
size_t tmp_bytes = 0;
C10_CUDA_CHECK(cudaMemGetInfo(
largest, // Use free memory as an optimistic initial guess of *largest
&tmp_bytes));
Expand Down Expand Up @@ -2038,7 +2039,7 @@ class DeviceCachingAllocator {
});

if (record_history) {
record_trace(TraceEntry::SNAPSHOT, 0, total_active, 0, nullptr);
record_trace(TraceEntry::SNAPSHOT, 0, total_active, nullptr, nullptr);
}
return result;
}
Expand Down Expand Up @@ -2665,7 +2666,7 @@ class DeviceCachingAllocator {
C10_CUDA_CHECK(cudaGetLastError());

size_t size = p.alloc_size;
void* ptr;
void* ptr = nullptr;

if (isRetry) {
stats.num_alloc_retries += 1;
Expand Down Expand Up @@ -2977,7 +2978,7 @@ class DeviceCachingAllocator {
}

void insert_events(Block* block) {
int prev_device;
int prev_device = 0;
C10_CUDA_CHECK(c10::cuda::GetDevice(&prev_device));

stream_set streams(std::move(block->stream_uses));
Expand All @@ -2997,7 +2998,7 @@ class DeviceCachingAllocator {
}

void insert_events_deferred_until_no_capture() {
if (C10_UNLIKELY(needs_events_deferred_until_no_capture.size() > 0)) {
if (C10_UNLIKELY(!needs_events_deferred_until_no_capture.empty())) {
for (auto* block : needs_events_deferred_until_no_capture) {
TORCH_INTERNAL_ASSERT(!block->stream_uses.empty());
insert_events(block);
Expand Down Expand Up @@ -3140,7 +3141,7 @@ class NativeCachingAllocator : public CUDAAllocator {
}

bool initialized() override {
return device_allocator.size() > 0;
return !device_allocator.empty();
}

/** allocates a block which is safe to use from the provided stream */
Expand Down Expand Up @@ -3196,17 +3197,17 @@ class NativeCachingAllocator : public CUDAAllocator {
CreateContextFn context_recorder,
size_t alloc_trace_max_entries,
bool alloc_trace_record_context) override {
int device;
int device = 0;
C10_CUDA_CHECK(c10::cuda::GetDevice(&device));
device_allocator[device]->recordHistory(
enabled,
std::move(context_recorder),
context_recorder,
alloc_trace_max_entries,
alloc_trace_record_context);
}

bool isHistoryEnabled() override {
int device;
int device = 0;
C10_CUDA_CHECK(c10::cuda::GetDevice(&device));
return device_allocator[device]->isHistoryEnabled();
}
Expand All @@ -3220,7 +3221,7 @@ class NativeCachingAllocator : public CUDAAllocator {
}

void attachOutOfMemoryObserver(OutOfMemoryObserver observer) override {
int device;
int device = 0;
C10_CUDA_CHECK(c10::cuda::GetDevice(&device));
device_allocator[device]->attachOutOfMemoryObserver(std::move(observer));
}
Expand Down Expand Up @@ -3319,7 +3320,7 @@ class NativeCachingAllocator : public CUDAAllocator {
OutOfMemoryError,
size < one_exa_bytes,
"CUDA out of memory. Tried to allocate more than 1EB memory.");
int device;
int device = 0;
C10_CUDA_CHECK(c10::cuda::GetDevice(&device));
void* r = nullptr;
if (forceUncachedAllocator()) {
Expand Down Expand Up @@ -3396,7 +3397,7 @@ class NativeCachingAllocator : public CUDAAllocator {
if (nbytes == 0) {
return nullptr;
}
int device;
int device = 0;
C10_CUDA_CHECK(c10::cuda::GetDevice(&device));
void* r = nullptr;
malloc(&r, device, nbytes, cuda::getCurrentCUDAStream(device));
Expand All @@ -3407,7 +3408,7 @@ class NativeCachingAllocator : public CUDAAllocator {
if (nbytes == 0) {
return nullptr;
}
int device;
int device = 0;
C10_CUDA_CHECK(c10::cuda::GetDevice(&device));
void* r = nullptr;
malloc(&r, device, nbytes, stream);
Expand Down Expand Up @@ -3484,7 +3485,7 @@ class NativeCachingAllocator : public CUDAAllocator {
C10_CUDA_CHECK(cudaIpcOpenMemHandle(
&dev, *ipc_handle, cudaIpcMemLazyEnablePeerAccess));
// devPtr has to be deleted in same device when created.
int curr_device;
int curr_device = 0;
C10_CUDA_CHECK(c10::cuda::GetDevice(&curr_device));
auto sp =
std::shared_ptr<void>(dev, [handle, curr_device, this](void* ptr) {
Expand Down Expand Up @@ -3590,7 +3591,7 @@ struct BackendStaticInitializer {
}
};

std::atomic<CUDAAllocator*> allocator{};
std::atomic<CUDAAllocator*> allocator;
BackendStaticInitializer backend_static_initializer;

} // namespace CUDACachingAllocator
Expand Down
4 changes: 2 additions & 2 deletions c10/cuda/CUDAFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ int32_t driver_version() {
}

int device_count_impl(bool fail_if_no_driver) {
int count;
int count = 0;
auto err = C10_CUDA_ERROR_HANDLED(c10::cuda::GetDeviceCount(&count));
if (err == cudaSuccess) {
return count;
Expand Down Expand Up @@ -121,7 +121,7 @@ DeviceIndex device_count_ensure_non_zero() {
}

DeviceIndex current_device() {
int cur_device;
int cur_device = 0;
C10_CUDA_CHECK(c10::cuda::GetDevice(&cur_device));
return static_cast<DeviceIndex>(cur_device);
}
Expand Down

0 comments on commit 87cbfe9

Please sign in to comment.