Skip to content
This repository has been archived by the owner on Aug 7, 2023. It is now read-only.

Work with newer thrust and libcudacxx for rapidsai #44

Open
wants to merge 6 commits into
base: branch-22.12
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gputreeshap
4 changes: 2 additions & 2 deletions jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include <jni.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h>

#include "../../../../src/common/device_helpers.cuh"
#include "../../../../src/common/cuda_pinned_allocator.h"
#include "../../../../src/data/array_interface.h"
#include "jvm_utils.h"
#include <xgboost/c_api.h>
Expand Down Expand Up @@ -131,7 +131,7 @@ class DataIteratorProxy {
bool cache_on_host_{true}; // TODO(Bobby): Make this optional.

template <typename T>
using Alloc = thrust::system::cuda::experimental::pinned_allocator<T>;
using Alloc = xgboost::common::cuda::pinned_allocator<T>;
template <typename U>
using HostVector = std::vector<U, Alloc<U>>;

Expand Down
91 changes: 91 additions & 0 deletions src/common/cuda_pinned_allocator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*!
* Copyright 2022 by XGBoost Contributors
* \file common.h
* \brief cuda pinned allocator for usage with thrust containers
*/

#pragma once

#include <cstddef>
#include <limits>

#include "common.h"

namespace xgboost {
namespace common {
namespace cuda {

// \p pinned_allocator is a CUDA-specific host memory allocator
// that employs \c cudaMallocHost for allocation.
//
// This implementation is ported from the experimental/pinned_allocator
// that Thrust used to provide.
//
// \see https://en.cppreference.com/w/cpp/memory/allocator
template <typename T>
class pinned_allocator;

template <>
class pinned_allocator<void> {
public:
using value_type = void; // NOLINT: The type of the elements in the allocator
using pointer = void*; // NOLINT: The type returned by address() / allocate()
using const_pointer = const void*; // NOLINT: The type returned by address()
using size_type = std::size_t; // NOLINT: The type used for the size of the allocation
using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers

template <typename U>
struct rebind { // NOLINT
using other = pinned_allocator<U>; // NOLINT: The rebound type
};
};


template <typename T>
class pinned_allocator {
public:
using value_type = T; // NOLINT: The type of the elements in the allocator
using pointer = T*; // NOLINT: The type returned by address() / allocate()
using const_pointer = const T*; // NOLINT: The type returned by address()
using reference = T&; // NOLINT: The parameter type for address()
using const_reference = const T&; // NOLINT: The parameter type for address()
using size_type = std::size_t; // NOLINT: The type used for the size of the allocation
using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers

template <typename U>
struct rebind { // NOLINT
using other = pinned_allocator<U>; // NOLINT: The rebound type
};

XGBOOST_DEVICE inline pinned_allocator() {}; // NOLINT: host/device markup ignored on defaulted functions
XGBOOST_DEVICE inline ~pinned_allocator() {} // NOLINT: host/device markup ignored on defaulted functions
XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {} // NOLINT: host/device markup ignored on defaulted functions


template <typename U>
XGBOOST_DEVICE inline pinned_allocator(pinned_allocator<U> const&) {} // NOLINT

XGBOOST_DEVICE inline pointer address(reference r) { return &r; } // NOLINT
XGBOOST_DEVICE inline const_pointer address(const_reference r) { return &r; } // NOLINT

inline pointer allocate(size_type cnt, const_pointer = nullptr) { // NOLINT
if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if

pointer result(nullptr);
dh::safe_cuda(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
return result;
}

inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); } // NOLINT

inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); } // NOLINT

XGBOOST_DEVICE inline bool operator==(pinned_allocator const& x) const { return true; }

XGBOOST_DEVICE inline bool operator!=(pinned_allocator const& x) const {
return !operator==(x);
}
};
} // namespace cuda
} // namespace common
} // namespace xgboost
4 changes: 2 additions & 2 deletions src/tree/gpu_hist/evaluate_splits.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
*/
#ifndef EVALUATE_SPLITS_CUH_
#define EVALUATE_SPLITS_CUH_
#include <thrust/system/cuda/experimental/pinned_allocator.h>
#include <thrust/tuple.h>
#include <xgboost/span.h>

#include "../../common/categorical.h"
#include "../../common/cuda_pinned_allocator.h"
#include "../split_evaluator.h"
#include "../updater_gpu_common.cuh"
#include "expand_entry.cuh"
Expand Down Expand Up @@ -40,7 +40,7 @@ template <typename GradientSumT>
class GPUHistEvaluator {
using CatST = common::CatBitField::value_type; // categorical storage type
// use pinned memory to stage the categories, used for sort based splits.
using Alloc = thrust::system::cuda::experimental::pinned_allocator<CatST>;
using Alloc = xgboost::common::cuda::pinned_allocator<CatST>;

private:
TreeEvaluator tree_evaluator_;
Expand Down