rapidsai · raydouglass · Oct 12, 2022 · Aug 1, 2022 · Aug 1, 2022 · Aug 1, 2022
diff --git a/cpp/.clang-format → .clang-format b/cpp/.clang-format → .clang-format
@@ -15,7 +15,7 @@ AlignTrailingComments: true
 AllowAllArgumentsOnNextLine: true
 AllowAllConstructorInitializersOnNextLine: true
 AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: true 
+AllowShortBlocksOnASingleLine: true
 AllowShortCaseLabelsOnASingleLine: true
 AllowShortEnumsOnASingleLine: true
 AllowShortFunctionsOnASingleLine: All
@@ -27,7 +27,7 @@ AlwaysBreakAfterDefinitionReturnType: None
 AlwaysBreakAfterReturnType: None
 AlwaysBreakBeforeMultilineStrings: true
 AlwaysBreakTemplateDeclarations: Yes
-BinPackArguments:  false       
+BinPackArguments:  false
 BinPackParameters: false
 BraceWrapping:
   AfterClass:            false

diff --git a/.gitattributes b/.gitattributes
@@ -1,2 +1,5 @@
 python/cudf/cudf/_version.py export-subst
-CHANGELOG.md merge=union
+python/strings_udf/strings_udf/_version.py export-subst
+python/cudf_kafka/cudf_kafka/_version.py export-subst
+python/custreamz/custreamz/_version.py export-subst
+python/dask_cudf/dask_cudf/_version.py export-subst
diff --git a/.gitignore b/.gitignore
@@ -24,17 +24,19 @@ cudf.egg-info/
 python/build
 python/*/build
 python/cudf/cudf-coverage.xml
-python/cudf/*/_lib/**/*\.cpp
+python/cudf/*/_lib/**/*.cpp
 python/cudf/*/_lib/**/*.h
 python/cudf/*/_lib/.nfs*
-python/cudf/*/_cuda/*\.cpp
+python/cudf/*/_cuda/*.cpp
 python/cudf/*.ipynb
 python/cudf/.ipynb_checkpoints
 python/*/record.txt
-python/cudf_kafka/*/_lib/**/*\.cpp
+python/cudf_kafka/*/_lib/**/*.cpp
 python/cudf_kafka/*/_lib/**/*.h
-python/custreamz/*/_lib/**/*\.cpp
+python/custreamz/*/_lib/**/*.cpp
 python/custreamz/*/_lib/**/*.h
+python/strings_udf/strings_udf/_lib/*.cpp
+python/strings_udf/strings_udf/*.ptx
 .Python
 env/
 develop-eggs/

@@ -9,7 +9,6 @@ repos:
                 # project can specify its own first/third-party packages.
                 args: ["--config-root=python/", "--resolve-all-configs"]
                 files: python/.*
-                exclude: (__init__.py|setup.py)$
                 types_or: [python, cython, pyi]
       - repo: https://github.com/psf/black
         rev: 22.3.0
@@ -26,10 +25,15 @@ repos:
                 files: python/.*\.(py|pyx|pxd)$
                 types: [file]
       - repo: https://github.com/pre-commit/mirrors-mypy
-        rev: 'v0.782'
+        rev: 'v0.971'
         hooks:
               - id: mypy
-                args: ["--config-file=setup.cfg", "python/cudf/cudf", "python/dask_cudf/dask_cudf", "python/custreamz/custreamz", "python/cudf_kafka/cudf_kafka"]
+                additional_dependencies: [types-cachetools]
+                args: ["--config-file=setup.cfg",
+                       "python/cudf/cudf",
+                       "python/custreamz/custreamz",
+                       "python/cudf_kafka/cudf_kafka",
+                       "python/dask_cudf/dask_cudf"]
                 pass_filenames: false
       - repo: https://github.com/PyCQA/pydocstyle
         rev: 6.1.1
@@ -40,9 +44,8 @@ repos:
         rev: v11.1.0
         hooks:
               - id: clang-format
-                files: \.(cu|cuh|h|hpp|cpp|inl)$
-                types_or: [file]
-                args: ['-fallback-style=none', '-style=file', '-i']
+                types_or: [c, c++, cuda]
+                args: ["-fallback-style=none", "-style=file", "-i"]
       - repo: local
         hooks:
               - id: no-deprecationwarning
@@ -60,6 +63,8 @@ repos:
                 # of dependencies, so we'll have to update this manually.
                 additional_dependencies:
                   - cmakelang==0.6.13
+                verbose: true
+                require_serial: true
               - id: cmake-lint
                 name: cmake-lint
                 entry: ./cpp/scripts/run-cmake-format.sh cmake-lint
@@ -69,13 +74,14 @@ repos:
                 # of dependencies, so we'll have to update this manually.
                 additional_dependencies:
                   - cmakelang==0.6.13
+                verbose: true
+                require_serial: true
               - id: copyright-check
                 name: copyright-check
-                # This hook's use of Git tools appears to conflict with
-                # existing CI invocations so we don't invoke it during CI runs.
-                stages: [commit]
-                entry: python ./ci/checks/copyright.py --git-modified-only
+                entry: python ./ci/checks/copyright.py --git-modified-only --update-current-year
                 language: python
+                pass_filenames: false
+                additional_dependencies: [gitpython]
               - id: doxygen-check
                 name: doxygen-check
                 entry: ./ci/checks/doxygen.sh
@@ -84,6 +90,18 @@ repos:
                 language: system
                 pass_filenames: false
                 verbose: true
+              - id: headers-recipe-check
+                name: headers-recipe-check
+                entry: ./ci/checks/headers_test.sh
+                files: |
+                  (?x)^(
+                    ^cpp/include/|
+                    ^conda/.*/meta.yaml
+                  )
+                types_or: [file]
+                language: system
+                pass_filenames: false
+                verbose: false
 
 default_language_version:
       python: python3
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,7 @@
+# cuDF 22.10.00 (Date TBD)
+
+Please see https://github.com/rapidsai/cudf/releases/tag/v22.10.00a for the latest changes to this development branch.
+
 # cuDF 22.08.00 (17 Aug 2022)
 
 ## 🚨 Breaking Changes

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -73,7 +73,7 @@ Compilers:
 
 * `gcc` version 9.3+
 * `nvcc` version 11.5+
-* `cmake` version 3.20.1+
+* `cmake` version 3.23.1+
 
 CUDA/GPU:
 
@@ -380,6 +380,13 @@ Now code linters and formatters will be run each time you commit changes.
 
 You can skip these checks with `git commit --no-verify` or with the short version `git commit -n`.
 
+## Developer Guidelines
+
+The [C++ Developer Guide](cpp/docs/DEVELOPER_GUIDE.md) includes details on contributing to libcudf C++ code.
+
+The [Python Developer Guide](https://docs.rapids.ai/api/cudf/stable/developer_guide/index.html) includes details on contributing to cuDF Python code.
+
+
 ## Attribution
 
 Portions adopted from https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md

@@ -17,7 +17,7 @@ ARGS=$*
 # script, and that this script resides in the repo dir!
 REPODIR=$(cd $(dirname $0); pwd)
 
-VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --opensource_nvcomp  --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
+VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz strings_udf -v -g -n -l --allgpuarch --disable_nvtx --opensource_nvcomp  --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
 HELP="$0 [clean] [libcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
    clean                         - remove all existing build artifacts and configuration (start
                                    over)
@@ -329,7 +329,16 @@ fi
 if buildAll || hasArg cudf; then
 
     cd ${REPODIR}/python/cudf
-    python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
+    python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
+    if [[ ${INSTALL_TARGET} != "" ]]; then
+        python setup.py install --single-version-externally-managed --record=record.txt  -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
+    fi
+fi
+
+if buildAll || hasArg strings_udf; then
+
+    cd ${REPODIR}/python/strings_udf
+    python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
     if [[ ${INSTALL_TARGET} != "" ]]; then
         python setup.py install --single-version-externally-managed --record=record.txt  -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
     fi

@@ -39,6 +39,9 @@ export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
 export INSTALL_DASK_MAIN=0
 
+# Dask version to install when `INSTALL_DASK_MAIN=0`
+export DASK_STABLE_VERSION="2022.9.2"
+
 function remove_libcudf_kernel_cache_dir {
     EXITCODE=$?
     logger "removing kernel cache dir: $LIBCUDF_KERNEL_CACHE_PATH"
@@ -82,8 +85,8 @@ if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then
     gpuci_logger "gpuci_mamba_retry update dask"
     gpuci_mamba_retry update dask
 else
-    gpuci_logger "gpuci_mamba_retry install conda-forge::dask==2022.7.1 conda-forge::distributed==2022.7.1 conda-forge::dask-core==2022.7.1 --force-reinstall"
-    gpuci_mamba_retry install conda-forge::dask==2022.7.1 conda-forge::distributed==2022.7.1 conda-forge::dask-core==2022.7.1 --force-reinstall
+    gpuci_logger "gpuci_mamba_retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall"
+    gpuci_mamba_retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall
 fi
 
 # Install the master version of streamz