Skip to content
This repository has been archived by the owner on May 19, 2023. It is now read-only.

Commit

Permalink
Merge pull request #155 from brhodes10/feature/tokenizer-rmm
Browse files Browse the repository at this point in the history
[REVIEW] Tokenizer with rmm integration
  • Loading branch information
brhodes10 committed Jun 12, 2020
2 parents e93f3d2 + 83dd0aa commit d72b813
Show file tree
Hide file tree
Showing 26 changed files with 124,217 additions and 123,609 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
- PR #149 Add Versioneer
- PR #151 README and CONTRIBUTING updates
- PR #160 Build script updates
- PR #155 Tokenizer rmm integration

## Bug Fixes
- PR #150 Fix splunk alert workflow test
Expand Down
1 change: 1 addition & 0 deletions conda/environments/clx_dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- pytorch==1.3.1
- torchvision=0.4.2
- cython>=0.29,<0.30
- rmm=0.14.*
- pip
- pip:
- "git+https://github.com/rapidsai/cudatashader.git"
216 changes: 199 additions & 17 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,24 +1,206 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_STANDARD 14)

project(cuda_tokenizer LANGUAGES CXX CUDA)

find_package(PythonInterp 3 REQUIRED)
find_package(PythonLibs 3 REQUIRED)

###################################################################################################
# - compiler options ------------------------------------------------------------------------------

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_C_COMPILER $ENV{CC})
set(CMAKE_CXX_COMPILER $ENV{CXX})
set(CMAKE_CXX_STANDARD_REQUIRED ON)

set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)

if(CMAKE_COMPILER_IS_GNUCXX)
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-error=deprecated-declarations")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -O3 -Wno-unused-function")

# Suppress parentheses warning which causes gmock to fail
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wno-parentheses")

option(CMAKE_CXX11_ABI "Enable the GLIBCXX11 ABI" ON)
if(CMAKE_CXX11_ABI)
message(STATUS "CUDF: Enabling the GLIBCXX11 ABI")
else()
message(STATUS "CUDF: Disabling the GLIBCXX11 ABI")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -D_GLIBCXX_USE_CXX11_ABI=0")
endif(CMAKE_CXX11_ABI)
endif(CMAKE_COMPILER_IS_GNUCXX)

if(CMAKE_CUDA_COMPILER_VERSION)
# Compute the version. from CMAKE_CUDA_COMPILER_VERSION
string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${CMAKE_CUDA_COMPILER_VERSION})
string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${CMAKE_CUDA_COMPILER_VERSION})
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.")
mark_as_advanced(CUDA_VERSION)
endif()

message(STATUS "CUDA_VERSION_MAJOR: ${CUDA_VERSION_MAJOR}")
message(STATUS "CUDA_VERSION_MINOR: ${CUDA_VERSION_MINOR}")
message(STATUS "CUDA_VERSION: ${CUDA_VERSION}")

# Always set this convenience variable
set(CUDA_VERSION_STRING "${CUDA_VERSION}")

# Auto-detect available GPU compute architectures
set(GPU_ARCHS "ALL" CACHE STRING
"List of GPU architectures (semicolon-separated) to be compiled for. Pass 'ALL' if you want to compile for all supported GPU architectures. Empty string means to auto-detect the GPUs on the current system")

if("${GPU_ARCHS}" STREQUAL "")
include(cmake/EvalGpuArchs.cmake)
evaluate_gpu_archs(GPU_ARCHS)
endif()

if("${GPU_ARCHS}" STREQUAL "ALL")
set(GPU_ARCHS "60")
if((CUDA_VERSION_MAJOR EQUAL 9) OR (CUDA_VERSION_MAJOR GREATER 9))
set(GPU_ARCHS "${GPU_ARCHS};70")
endif()
if((CUDA_VERSION_MAJOR EQUAL 10) OR (CUDA_VERSION_MAJOR GREATER 10))
set(GPU_ARCHS "${GPU_ARCHS};75")
endif()
endif()
message("GPU_ARCHS = ${GPU_ARCHS}")

foreach(arch ${GPU_ARCHS})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_${arch},code=sm_${arch}")
endforeach()

list(GET GPU_ARCHS -1 ptx)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_${ptx},code=compute_${ptx}")

set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr")

# set warnings as errors
# TODO: remove `no-maybe-unitialized` used to suppress warnings in rmm::exec_policy
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call -Xcompiler -Wall,-Werror,-Wno-error=deprecated-declarations")

option(DISABLE_DEPRECATION_WARNING "Disable warnings generated from deprecated declarations." OFF)
if(DISABLE_DEPRECATION_WARNING)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wno-deprecated-declarations")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations")
endif(DISABLE_DEPRECATION_WARNING)

# Option to enable line info in CUDA device compilation to allow introspection when profiling / memchecking
option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF)
if(CMAKE_CUDA_LINEINFO)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo")
endif(CMAKE_CUDA_LINEINFO)

# Debug options
if(CMAKE_BUILD_TYPE MATCHES Debug)
message(STATUS "Building with debugging flags")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G -Xcompiler -rdynamic")
endif(CMAKE_BUILD_TYPE MATCHES Debug)

# To apply RUNPATH to transitive dependencies (this is a temporary solution)
set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--disable-new-dtags")
set(CMAKE_EXE_LINKER_FLAGS "-Wl,--disable-new-dtags")

set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} \
--expt-relaxed-constexpr \
--expt-relaxed-constexpr \
--expt-extended-lambda \
--default-stream per-thread \
-gencode arch=compute_70,code=sm_70 \
-gencode arch=compute_75,code=sm_75 \
-O3")
-O3 \
-lineinfo")

set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/" ${CMAKE_MODULE_PATH})

# Build options
option(BUILD_SHARED_LIBS "Build shared libraries" ON)
option(BUILD_TESTS "Configure CMake to build tests" ON)
option(BUILD_BENCHMARKS "Configure CMake to build (google) benchmarks" OFF)

###################################################################################################
# - RMM -------------------------------------------------------------------------------------------

find_path(RMM_INCLUDE "rmm"
HINTS "$ENV{RMM_ROOT}/include")

find_library(RMM_LIBRARY "rmm"
HINTS "$ENV{RMM_ROOT}/lib" "$ENV{RMM_ROOT}/build")

message(STATUS "RMM: RMM_LIBRARY set to ${RMM_LIBRARY}")
message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}")

add_library(rmm SHARED IMPORTED ${RMM_LIBRARY})
if(RMM_INCLUDE AND RMM_LIBRARY)
set_target_properties(rmm PROPERTIES IMPORTED_LOCATION ${RMM_LIBRARY})
endif(RMM_INCLUDE AND RMM_LIBRARY)

include_directories("${RMM_INCLUDE}")
link_directories("${RMM_LIBRARY}")

###################################################################################################
# - add google benchmark --------------------------------------------------------------------------

if(BUILD_BENCHMARKS)
include(ConfigureGoogleBenchmark)

if(GBENCH_FOUND)
message(STATUS "Google C++ Benchmarking Framework (Google Benchmark) found in ${GBENCH_ROOT}")
include_directories(${GBENCH_INCLUDE_DIR})
add_subdirectory(${CMAKE_SOURCE_DIR}/benchmarks)
else()
message(AUTHOR_WARNING "Google C++ Benchmarking Framework (Google Benchmark) not found: automated tests are disabled.")
endif(GBENCH_FOUND)

endif(BUILD_BENCHMARKS)

message(STATUS "BENCHMARK_LIST set to: ${BENCHMARK_LIST}")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -O3 -Wno-unused-function")
###################################################################################################
# - library paths ---------------------------------------------------------------------------------

link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the link directories for nvcc
"${CMAKE_BINARY_DIR}/lib"
"${CMAKE_BINARY_DIR}"
"${FLATBUFFERS_LIBRARY_DIR}"
"${GTEST_LIBRARY_DIR}"
"${GBENCH_LIBRARY_DIR}"
"${RMM_LIBRARY}")

###################################################################################################
# - library targets -------------------------------------------------------------------------------
add_library(clx SHARED
src/hash_utils.cu
src/basicTokenizer.cu
src/wordPieceTokenizer.cu
src/fullTokenizer.cu
src/main.cu
)
###################################################################################################
# - add gtest -------------------------------------------------------------------------------------

if(BUILD_TESTS)
include(CTest)
include(ConfigureGoogleTest)

if(GTEST_FOUND)
message(STATUS "Google C++ Testing Framework (Google Test) found in ${GTEST_ROOT}")
include_directories(${GTEST_INCLUDE_DIR})
add_subdirectory(${CMAKE_SOURCE_DIR}/tests)
else()
message(AUTHOR_WARNING "Google C++ Testing Framework (Google Test) not found: automated tests are disabled.")
endif(GTEST_FOUND)
endif(BUILD_TESTS)

message(STATUS "CLX_TEST_LIST set to: ${CLX_TEST_LIST}")

###################################################################################################
# - include paths ---------------------------------------------------------------------------------

include_directories("${RMM_INCLUDE}")
###################################################################################################
set(CU_TOK_LIBS
cudart
nvToolsExt
Expand All @@ -27,6 +209,7 @@ set(CU_TOK_LIBS

include_directories(
./cub
./include
)

add_custom_command(
Expand All @@ -40,26 +223,25 @@ ADD_CUSTOM_TARGET(gen_cuda_header ALL
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/cp_data.h
)

add_library(clx SHARED
src/hash_utils.cu
src/basicTokenizer.cu
src/wordPieceTokenizer.cu
src/fullTokenizer.cu
add_executable(tokenizer
src/main.cu
)

target_link_libraries(clx nvToolsExt)
target_link_libraries(tokenizer ${CU_TOK_LIBS} rmm)

add_executable(tokenizer
src/main.cu
)
target_link_libraries(clx nvToolsExt rmm)

target_include_directories(tokenizer
PRIVATE
./include)

set_target_properties(tokenizer
PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/bin"
)

target_link_libraries(tokenizer ${CU_TOK_LIBS})
add_custom_target(build_benchmarks_clx
DEPENDS ${BENCHMARK_LIST})

add_dependencies(clx gen_cuda_header)

Expand Down
53 changes: 53 additions & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)

project(CLX_BENCHS LANGUAGES C CXX CUDA)

###################################################################################################
# - compiler function -----------------------------------------------------------------------------

set(BENCHMARK_LIST CACHE INTERNAL "BENCHMARK_LIST")

function(ConfigureBench CMAKE_BENCH_NAME CMAKE_BENCH_SRC)
add_executable(${CMAKE_BENCH_NAME}
${CMAKE_BENCH_SRC}
#"${CMAKE_CURRENT_SOURCE_DIR}/synchronization/synchronization.cpp"
#"${CMAKE_SOURCE_DIR}/tests/utilities/base_fixture.cpp")
)
set_target_properties(${CMAKE_BENCH_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(${CMAKE_BENCH_NAME} benchmark benchmark_main pthread clx )
set_target_properties(${CMAKE_BENCH_NAME} PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/gbenchmarks")
set(BENCHMARK_LIST ${BENCHMARK_LIST} ${CMAKE_BENCH_NAME} CACHE INTERNAL "BENCHMARK_LIST")
endfunction(ConfigureBench)

###################################################################################################
# - include paths ---------------------------------------------------------------------------------
include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}"
"${CMAKE_BINARY_DIR}/include"
"${CMAKE_SOURCE_DIR}/include"
"${CMAKE_SOURCE_DIR}"
"${CMAKE_SOURCE_DIR}/src"
"${GBENCH_INCLUDE_DIR}"
"${RMM_INCLUDE}"
"${CMAKE_CURRENT_SOURCE_DIR}")



###################################################################################################
# - library paths ---------------------------------------------------------------------------------

link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the link directories for nvcc
"${CMAKE_BINARY_DIR}/lib"
"${CMAKE_BINARY_DIR}"
"${FLATBUFFERS_LIBRARY_DIR}"
"${GTEST_LIBRARY_DIR}"
"${GBENCH_LIBRARY_DIR}"
"${RMM_LIBRARY}")

###################################################################################################
# - tokenizer benchmarks -----------------------------------------------------------------------------

set(TOKENIZER_BENCH_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/tokenizer_benchmark.cu")

ConfigureBench(TOKENIZER_BENCH "${TOKENIZER_BENCH_SRC}")
65 changes: 65 additions & 0 deletions cpp/benchmarks/tokenizer_benchmark.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#include <benchmark/benchmark.h>
#include <for_cython.h>

#include <thrust/device_vector.h>
#include <rmm/rmm.h>
#include <rmm/thrust_rmm_allocator.h>

#define MAX_NUM_SENTENCES 101
#define MAX_NUM_CHARS 150000
#define MAX_ROWS_TENSOR 300

static void BM_cuda_tokenizer_file(benchmark::State& state) {
std::string input_file_name = "cpp/benchmarks/tokenizer_benchmark.txt";
std::string hash_file = "python/clx/analytics/resources/bert_hash_table.txt";
uint32_t max_sequence_length = 64;
uint32_t stride = 48;
uint32_t do_truncate = 0;
uint32_t do_lower = 1;
TokenizerResult* result = new TokenizerResult();
for (auto _ : state){
cuda_tokenizer_file(input_file_name, hash_file, max_sequence_length, stride, do_lower, do_truncate,
MAX_NUM_SENTENCES, MAX_NUM_CHARS, MAX_ROWS_TENSOR, result);
}
}
BENCHMARK(BM_cuda_tokenizer_file);

void flatten_sentences(const std::vector<std::string>& sentences,
char* flattened_sentences,
uint32_t* sentence_offsets) {

uint32_t start_copy = 0;
for(uint32_t i = 0; i < sentences.size(); ++i){
const uint32_t sentence_length = sentences[i].size();

sentences[i].copy(flattened_sentences + start_copy, sentence_length);
sentence_offsets[i] = start_copy;
start_copy += sentence_length;
}
sentence_offsets[sentences.size()] = start_copy;
}

static void BM_cuda_tokenizer_cudf(benchmark::State& state) {
rmm::device_vector<char> device_sentences{};
device_sentences.resize(MAX_NUM_CHARS);

std::string sentences = "This is a test";
std::vector<char> char_sentences(sentences.length());
std::copy(sentences.begin(), sentences.end(), char_sentences.begin());
device_sentences = char_sentences;

std::string hash_file = "python/clx/analytics/resources/bert_hash_table.txt";
std::vector<uint32_t> offsets{14};
uint32_t max_sequence_length = 64;
uint32_t stride = 48;
uint32_t do_truncate = 0;
uint32_t do_lower = 1;
TokenizerResult* result = new TokenizerResult();
for (auto _ : state){
cuda_tokenizer_cudf(thrust::raw_pointer_cast(device_sentences.data()), offsets.data(), offsets.size(), hash_file, max_sequence_length, stride, do_lower, do_truncate,
MAX_NUM_SENTENCES, MAX_NUM_CHARS, MAX_ROWS_TENSOR, result);
}
}
BENCHMARK(BM_cuda_tokenizer_cudf);

BENCHMARK_MAIN();
1 change: 1 addition & 0 deletions cpp/benchmarks/tokenizer_benchmark.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is a test.
Loading

0 comments on commit d72b813

Please sign in to comment.