Update JNI build to use CMAKE_CUDA_ARCHITECTURES (#7425)

This eliminates the `Policy CMP0104 is not set` warning during the JNI build by using `CMAKE_CUDA_ARCHITECTURES` to specify the targeted CUDA architectures during the build. This eliminates a lot of code replicated from the cpp build and reuses the `ConfigureCUDA` module added in #7391. The architectures being used by the build are visible in the `mvn` output, e.g.: ``` [exec] -- CUDF: Building CUDF for GPU architectures: 60-real;70-real;75 ``` This also configures the CPU compiler to use the same flags as the cpp build which required fixing a number of warnings (e.g.: sign mismatch comparisons, unused variables, etc.) in the JNI code since warnings are errors in the build. Authors: - Jason Lowe (@jlowe) Approvers: - Robert (Bobby) Evans (@revans2) - Alessandro Bellina (@abellina) - MithunR (@mythrocks) URL: #7425
rapidsai · Feb 24, 2021 · 3c8b831 · 3c8b831
1 parent b0e5aef
commit 3c8b831
Show file tree

Hide file tree

Showing 4 changed files with 48 additions and 119 deletions.
diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,9 +15,35 @@
 #=============================================================================
 cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
 
+# Use GPU_ARCHS if CMAKE_CUDA_ARCHITECTURES is not defined
+if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES AND DEFINED GPU_ARCHS)
+  if(NOT "${GPU_ARCHS}" STREQUAL "ALL")
+    set(CMAKE_CUDA_ARCHITECTURES "${GPU_ARCHS}")
+  endif()
+endif()
+
+# If `CMAKE_CUDA_ARCHITECTURES` is not defined, build for all supported architectures. If
+# `CMAKE_CUDA_ARCHITECTURES` is set to an empty string (""), build for only the current
+# architecture. If `CMAKE_CUDA_ARCHITECTURES` is specified by the user, use user setting.
+
+# This needs to be run before enabling the CUDA language due to the default initialization behavior
+# of `CMAKE_CUDA_ARCHITECTURES`, https://gitlab.kitware.com/cmake/cmake/-/issues/21302
+if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+  set(CUDF_JNI_BUILD_FOR_ALL_ARCHS TRUE)
+elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "")
+  unset(CMAKE_CUDA_ARCHITECTURES CACHE)
+  set(CUDF_JNI_BUILD_FOR_DETECTED_ARCHS TRUE)
+endif()
+
 project(CUDF_JNI VERSION 0.7.0 LANGUAGES C CXX CUDA)
 
-set(CUDF_CPP_BUILD_DIR "${PROJECT_SOURCE_DIR}/../../../../cpp/build")
+set(CUDA_DATAFRAME_SOURCE_DIR "${PROJECT_SOURCE_DIR}/../../../../cpp")
+set(CUDF_CPP_BUILD_DIR "${CUDA_DATAFRAME_SOURCE_DIR}/build")
+
+set(CMAKE_MODULE_PATH
+    "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/"
+    "${CUDA_DATAFRAME_SOURCE_DIR}/cmake/Modules/"
+    ${CMAKE_MODULE_PATH})
 
 ###################################################################################################
 # - build type ------------------------------------------------------------------------------------
@@ -45,88 +71,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CUDA_STANDARD 14)
 set(CMAKE_CUDA_STANDARD_REQUIRED ON)
 
-if(CMAKE_COMPILER_IS_GNUCXX)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-error=deprecated-declarations")
-endif(CMAKE_COMPILER_IS_GNUCXX)
-
-if(CMAKE_CUDA_COMPILER_VERSION)
-  # Compute the version. from  CMAKE_CUDA_COMPILER_VERSION
-  string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${CMAKE_CUDA_COMPILER_VERSION})
-  string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${CMAKE_CUDA_COMPILER_VERSION})
-  set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.")
-  mark_as_advanced(CUDA_VERSION)
-endif()
-
-message(STATUS "CUDA_VERSION_MAJOR: ${CUDA_VERSION_MAJOR}")
-message(STATUS "CUDA_VERSION_MINOR: ${CUDA_VERSION_MINOR}")
-message(STATUS "CUDA_VERSION: ${CUDA_VERSION}")
-
-# Always set this convenience variable
-set(CUDA_VERSION_STRING "${CUDA_VERSION}")
-
-# Auto-detect available GPU compute architectures
-set(GPU_ARCHS "ALL" CACHE STRING
-  "List of GPU architectures (semicolon-separated) to be compiled for. Pass 'ALL' if you want to compile for all supported GPU architectures. Empty string means to auto-detect the GPUs on the current system")
-
-if("${GPU_ARCHS}" STREQUAL "")
-  include(cmake/EvalGpuArchs.cmake)
-  evaluate_gpu_archs(GPU_ARCHS)
-endif()
-
-if("${GPU_ARCHS}" STREQUAL "ALL")
-
-  # Check for embedded vs workstation architectures
-  if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-    # This is being built for Linux4Tegra or SBSA ARM64
-    set(GPU_ARCHS "62")
-    if((CUDA_VERSION_MAJOR EQUAL 9) OR (CUDA_VERSION_MAJOR GREATER 9))
-      set(GPU_ARCHS "${GPU_ARCHS};72")
-    endif()
-    if((CUDA_VERSION_MAJOR EQUAL 11) OR (CUDA_VERSION_MAJOR GREATER 11))
-      set(GPU_ARCHS "${GPU_ARCHS};75;80")
-    endif()
-
-  else()
-    # This is being built for an x86 or x86_64 architecture
-    set(GPU_ARCHS "60")
-    if((CUDA_VERSION_MAJOR EQUAL 9) OR (CUDA_VERSION_MAJOR GREATER 9))
-      set(GPU_ARCHS "${GPU_ARCHS};70")
-    endif()
-    if((CUDA_VERSION_MAJOR EQUAL 10) OR (CUDA_VERSION_MAJOR GREATER 10))
-      set(GPU_ARCHS "${GPU_ARCHS};75")
-    endif()
-    if((CUDA_VERSION_MAJOR EQUAL 11) OR (CUDA_VERSION_MAJOR GREATER 11))
-      set(GPU_ARCHS "${GPU_ARCHS};80")
-    endif()
-
-  endif()
-
-endif()
-message("GPU_ARCHS = ${GPU_ARCHS}")
-
-foreach(arch ${GPU_ARCHS})
-  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_${arch},code=sm_${arch}")
-endforeach()
-
-
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr")
-
-# set warnings as errors
-# TODO: remove `no-maybe-unitialized` used to suppress warnings in rmm::exec_policy
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror cross-execution-space-call -Xcompiler -Wall,-Werror,-Wno-error=deprecated-declarations")
-
-# Option to enable line info in CUDA device compilation to allow introspection when profiling / memchecking
-option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF)
-if (CMAKE_CUDA_LINEINFO)
-    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo")
-endif(CMAKE_CUDA_LINEINFO)
-
-# Debug options
-if(CMAKE_BUILD_TYPE MATCHES Debug)
-    message(STATUS "Building with debugging flags")
-    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G -Xcompiler -rdynamic")
-endif(CMAKE_BUILD_TYPE MATCHES Debug)
-
 option(BUILD_TESTS "Configure CMake to build tests"
        ON)
 
@@ -146,11 +90,7 @@ endif(CUDA_STATIC_RUNTIME)
 ###################################################################################################
 # - cmake modules ---------------------------------------------------------------------------------
 
-set(CMAKE_MODULE_PATH
-    "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/"
-    "${PROJECT_SOURCE_DIR}/../../../../cpp/cmake/Modules/"
-    ${CMAKE_MODULE_PATH})
-
+include(ConfigureCUDA)
 include(FeatureSummary)
 include(CheckIncludeFiles)
 include(CheckLibraryExists)

diff --git a/java/src/main/native/src/ColumnVectorJni.cpp b/java/src/main/native/src/ColumnVectorJni.cpp
@@ -147,7 +147,6 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_makeList(JNIEnv *env, j
     if (children.size() == 0) {
       // special case because cudf::interleave_columns does not support no columns
       auto offsets = cudf::make_column_from_scalar(*zero, row_count + 1);
-      cudf::type_id n_type = static_cast<cudf::type_id>(j_type);
       cudf::data_type n_data_type = cudf::jni::make_data_type(j_type, scale);
       auto empty_col = cudf::make_empty_column(n_data_type);
       ret = cudf::make_lists_column(row_count, std::move(offsets), std::move(empty_col),
@@ -308,7 +307,6 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_makeEmptyCudfColumn(JNI
 
   try {
     cudf::jni::auto_set_device(env);
-    cudf::type_id n_type = static_cast<cudf::type_id>(j_type);
     cudf::data_type n_data_type = cudf::jni::make_data_type(j_type, scale);
 
     std::unique_ptr<cudf::column> column(cudf::make_empty_column(n_data_type));

diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
@@ -303,11 +303,11 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_slice(JNIEnv *env, j
     std::vector<cudf::column_view> result = cudf::slice(*n_column, indices);
     cudf::jni::native_jlongArray n_result(env, result.size());
     std::vector<std::unique_ptr<cudf::column>> column_result(result.size());
-    for (int i = 0; i < result.size(); i++) {
+    for (size_t i = 0; i < result.size(); i++) {
       column_result[i].reset(new cudf::column(result[i]));
       n_result[i] = reinterpret_cast<jlong>(column_result[i].get());
     }
-    for (int i = 0; i < result.size(); i++) {
+    for (size_t i = 0; i < result.size(); i++) {
       column_result[i].release();
     }
     return n_result.get_jArray();
@@ -418,11 +418,11 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_split(JNIEnv *env, j
     std::vector<cudf::column_view> result = cudf::split(*n_column, indices);
     cudf::jni::native_jlongArray n_result(env, result.size());
     std::vector<std::unique_ptr<cudf::column>> column_result(result.size());
-    for (int i = 0; i < result.size(); i++) {
+    for (size_t i = 0; i < result.size(); i++) {
       column_result[i].reset(new cudf::column(result[i]));
       n_result[i] = reinterpret_cast<jlong>(column_result[i].get());
     }
-    for (int i = 0; i < result.size(); i++) {
+    for (size_t i = 0; i < result.size(); i++) {
       column_result[i].release();
     }
     return n_result.get_jArray();

diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
@@ -86,7 +86,7 @@ class jni_writer_data_sink final : public cudf::io::data_sink {
 
   void host_write(void const *data, size_t size) override {
     JNIEnv *env = cudf::jni::get_jni_env(jvm);
-    size_t left_to_copy = size;
+    long left_to_copy = static_cast<long>(size);
     const char *copy_from = static_cast<const char *>(data);
     while (left_to_copy > 0) {
       long buffer_amount_available = current_buffer_len - current_buffer_written;
@@ -111,7 +111,7 @@ class jni_writer_data_sink final : public cudf::io::data_sink {
 
   void device_write(void const *gpu_data, size_t size, rmm::cuda_stream_view stream) override {
     JNIEnv *env = cudf::jni::get_jni_env(jvm);
-    size_t left_to_copy = size;
+    long left_to_copy = static_cast<long>(size);
     const char *copy_from = static_cast<const char *>(gpu_data);
     while (left_to_copy > 0) {
       long buffer_amount_available = current_buffer_len - current_buffer_written;
@@ -209,7 +209,7 @@ class native_arrow_ipc_writer_handle final {
 
   explicit native_arrow_ipc_writer_handle(const std::vector<std::string> &col_names,
                                           const std::shared_ptr<arrow::io::OutputStream> &sink)
-      : initialized(false), column_names(col_names), sink(sink), file_name("") {}
+      : initialized(false), column_names(col_names), file_name(""), sink(sink) {}
 
   bool initialized;
   std::vector<std::string> column_names;
@@ -541,7 +541,7 @@ convert_table_for_return(JNIEnv *env, std::unique_ptr<cudf::table> &table_result
   for (int i = 0; i < table_cols; i++) {
     outcol_handles[i] = reinterpret_cast<jlong>(ret[i].release());
   }
-  for (int i = 0; i < extra_columns.size(); i++) {
+  for (size_t i = 0; i < extra_columns.size(); i++) {
     outcol_handles[i + table_cols] = reinterpret_cast<jlong>(extra_columns[i].release());
   }
   return outcol_handles.get_jArray();
@@ -553,6 +553,7 @@ jlongArray convert_table_for_return(JNIEnv *env, std::unique_ptr<cudf::table> &t
 }
 
 namespace {
+
 // Check that window parameters are valid.
 bool valid_window_parameters(native_jintArray const &values,
                              native_jpointerArray<cudf::aggregation> const &ops,
@@ -562,14 +563,6 @@ bool valid_window_parameters(native_jintArray const &values,
          values.size() == preceding.size() && values.size() == following.size();
 }
 
-// Check that time-range window parameters are valid.
-bool valid_window_parameters(native_jintArray const &values, native_jintArray const &timestamps,
-                             native_jpointerArray<cudf::aggregation> const &ops,
-                             native_jintArray const &min_periods, native_jintArray const &preceding,
-                             native_jintArray const &following) {
-  return values.size() == timestamps.size() &&
-         valid_window_parameters(values, ops, min_periods, preceding, following);
-}
 } // namespace
 
 } // namespace jni
@@ -927,7 +920,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetBufferBegin(
     table_metadata_with_nullability metadata;
     metadata.column_nullable = nullability;
     metadata.column_names = col_names.as_cpp_vector();
-    for (size_t i = 0; i < meta_keys.size(); ++i) {
+    for (auto i = 0; i < meta_keys.size(); ++i) {
       metadata.user_data[meta_keys[i].get()] = meta_values[i].get();
     }
 
@@ -977,7 +970,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetFileBegin(
     table_metadata_with_nullability metadata;
     metadata.column_nullable = nullability;
     metadata.column_names = col_names.as_cpp_vector();
-    for (size_t i = 0; i < meta_keys.size(); ++i) {
+    for (int i = 0; i < meta_keys.size(); ++i) {
       metadata.user_data[meta_keys[i].get()] = meta_values[i].get();
     }
     cudf::jni::native_jintArray precisions(env, j_precisions);
@@ -1106,7 +1099,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCBufferBegin(
     table_metadata_with_nullability metadata;
     metadata.column_nullable = nullability;
     metadata.column_names = col_names.as_cpp_vector();
-    for (size_t i = 0; i < meta_keys.size(); ++i) {
+    for (int i = 0; i < meta_keys.size(); ++i) {
       metadata.user_data[meta_keys[i].get()] = meta_values[i].get();
     }
 
@@ -1149,7 +1142,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin(
     table_metadata_with_nullability metadata;
     metadata.column_nullable = nullability;
     metadata.column_names = col_names.as_cpp_vector();
-    for (size_t i = 0; i < meta_keys.size(); ++i) {
+    for (int i = 0; i < meta_keys.size(); ++i) {
       metadata.user_data[meta_keys[i].get()] = meta_values[i].get();
     }
 
@@ -1605,7 +1598,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_concatenate(JNIEnv *env,
     cudf::jni::auto_set_device(env);
     cudf::jni::native_jpointerArray<cudf::table_view> tables(env, table_handles);
 
-    long unsigned int num_tables = tables.size();
+    int num_tables = tables.size();
     // There are some issues with table_view and std::vector. We cannot give the
     // vector a size or it will not compile.
     std::vector<cudf::table_view> to_concat;
@@ -1635,7 +1628,6 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_hashPartition(JNIEnv *env
     cudf::jni::auto_set_device(env);
     cudf::table_view *n_input_table = reinterpret_cast<cudf::table_view *>(input_table);
     cudf::jni::native_jintArray n_columns_to_hash(env, columns_to_hash);
-    int n_number_of_partitions = static_cast<int>(number_of_partitions);
     cudf::jni::native_jintArray n_output_offsets(env, output_offsets);
 
     JNI_ARG_CHECK(env, n_columns_to_hash.size() > 0, "columns_to_hash is zero", NULL);
@@ -1648,7 +1640,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_hashPartition(JNIEnv *env
     std::pair<std::unique_ptr<cudf::table>, std::vector<cudf::size_type>> result =
         cudf::hash_partition(*n_input_table, columns_to_hash_vec, number_of_partitions);
 
-    for (int i = 0; i < result.second.size(); i++) {
+    for (size_t i = 0; i < result.second.size(); i++) {
       n_output_offsets[i] = result.second[i];
     }
 
@@ -1668,12 +1660,11 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_roundRobinPartition(
   try {
     cudf::jni::auto_set_device(env);
     auto n_input_table = reinterpret_cast<cudf::table_view *>(input_table);
-    int n_num_partitions = static_cast<int>(num_partitions);
     cudf::jni::native_jintArray n_output_offsets(env, output_offsets);
 
     auto result = cudf::round_robin_partition(*n_input_table, num_partitions, start_partition);
 
-    for (int i = 0; i < result.second.size(); i++) {
+    for (size_t i = 0; i < result.second.size(); i++) {
       n_output_offsets[i] = result.second[i];
     }
 
@@ -1859,8 +1850,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_bound(JNIEnv *env, jclass, jlo
 
     JNI_ARG_CHECK(env, (column_desc_flags.size() == column_null_orders.size()),
                   "null-order and sort-order size mismatch", 0);
-    uint32_t num_columns = column_null_orders.size();
-    for (int i = 0; i < num_columns; i++) {
+    size_t num_columns = column_null_orders.size();
+    for (size_t i = 0; i < num_columns; i++) {
       column_desc_flags[i] = n_desc_flags[i] ? cudf::order::DESCENDING : cudf::order::ASCENDING;
       column_null_orders[i] =
           n_are_nulls_smallest[i] ? cudf::null_order::BEFORE : cudf::null_order::AFTER;
@@ -1894,7 +1885,7 @@ JNIEXPORT jobjectArray JNICALL Java_ai_rapids_cudf_Table_contiguousSplit(JNIEnv
     std::vector<cudf::packed_table> result = cudf::contiguous_split(*n_table, indices);
     cudf::jni::native_jobjectArray<jobject> n_result =
         cudf::jni::contiguous_table_array(env, result.size());
-    for (int i = 0; i < result.size(); i++) {
+    for (size_t i = 0; i < result.size(); i++) {
       n_result.set(i, cudf::jni::contiguous_table_from(env, result[i].data,
                                                        result[i].table.num_rows()));
     }