diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 1d0154aedc7..c854e67fbdf 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -217,7 +217,7 @@ fi
 
 cd "$WORKSPACE/python/cudf"
 gpuci_logger "Python py.test for cuDF"
-py.test -n 6 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term
+py.test -n 6 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term
 
 cd "$WORKSPACE/python/dask_cudf"
 gpuci_logger "Python py.test for dask-cudf"
diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml
index 1568327f88c..5561a573609 100644
--- a/conda/environments/cudf_dev_cuda11.0.yml
+++ b/conda/environments/cudf_dev_cuda11.0.yml
@@ -17,7 +17,7 @@ dependencies:
   - numba>=0.53.1
   - numpy
   - pandas>=1.0,<1.3.0dev0
-  - pyarrow=1.0.1
+  - pyarrow=4.0.1
   - fastavro>=0.22.9
   - notebook>=0.5.0
   - cython>=0.29,<0.30
@@ -44,8 +44,8 @@ dependencies:
   - dask>=2021.6.0
   - distributed>=2021.6.0
   - streamz
+  - arrow-cpp=4.0.1
   - dlpack>=0.5,<0.6.0a0
-  - arrow-cpp=1.0.1
   - arrow-cpp-proc * cuda
   - double-conversion
   - rapidjson
diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml
index 9d520ada253..6c8ae4cb9b0 100644
--- a/conda/environments/cudf_dev_cuda11.2.yml
+++ b/conda/environments/cudf_dev_cuda11.2.yml
@@ -17,7 +17,7 @@ dependencies:
   - numba>=0.53.1
   - numpy
   - pandas>=1.0,<1.3.0dev0
-  - pyarrow=1.0.1
+  - pyarrow=4.0.1
   - fastavro>=0.22.9
   - notebook>=0.5.0
   - cython>=0.29,<0.30
@@ -44,8 +44,8 @@ dependencies:
   - dask>=2021.6.0
   - distributed>=2021.6.0
   - streamz
+  - arrow-cpp=4.0.1
   - dlpack>=0.5,<0.6.0a0
-  - arrow-cpp=1.0.1
   - arrow-cpp-proc * cuda
   - double-conversion
   - rapidjson
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index d1aaf924555..3da7c63857d 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -30,7 +30,7 @@ requirements:
     - setuptools
     - numba >=0.53.1
     - dlpack>=0.5,<0.6.0a0
-    - pyarrow 1.0.1
+    - pyarrow 4.0.1
     - libcudf {{ version }}
     - rmm {{ minor_version }}
     - cudatoolkit {{ cuda_version }}
diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 14b94dd2249..150011c32bf 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -37,7 +37,7 @@ requirements:
   host:
     - librmm {{ minor_version }}.*
     - cudatoolkit {{ cuda_version }}.*
-    - arrow-cpp 1.0.1
+    - arrow-cpp 4.0.1
     - arrow-cpp-proc * cuda
     - dlpack>=0.5,<0.6.0a0
   run:
diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml
index f1ec813a17f..6b15890e7c7 100644
--- a/conda/recipes/libcudf_kafka/meta.yaml
+++ b/conda/recipes/libcudf_kafka/meta.yaml
@@ -25,8 +25,8 @@ requirements:
   build:
     - cmake >=3.20.1
   host:
-    - libcudf {{ version }}
-    - librdkafka >=1.5.0,<1.5.3
+    - libcudf {{version}}
+    - librdkafka >=1.6.0,<1.7.0a0
   run:
     - {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not
 
diff --git a/cpp/cmake/thirdparty/CUDF_GetArrow.cmake b/cpp/cmake/thirdparty/CUDF_GetArrow.cmake
index 0eee5abd2f3..e15f3f7e16d 100644
--- a/cpp/cmake/thirdparty/CUDF_GetArrow.cmake
+++ b/cpp/cmake/thirdparty/CUDF_GetArrow.cmake
@@ -127,6 +127,6 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3)
 
 endfunction()
 
-set(CUDF_VERSION_Arrow 1.0.1)
+set(CUDF_VERSION_Arrow 4.0.1)
 
 find_and_configure_arrow(${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3})
diff --git a/python/cudf/cudf/_fuzz_testing/avro.py b/python/cudf/cudf/_fuzz_testing/avro.py
index a07e3acf416..4c167ac627f 100644
--- a/python/cudf/cudf/_fuzz_testing/avro.py
+++ b/python/cudf/cudf/_fuzz_testing/avro.py
@@ -15,7 +15,7 @@
     pandas_to_avro,
     pyarrow_to_pandas,
 )
-from cudf.tests import dataset_generator as dg
+from cudf.testing import dataset_generator as dg
 
 logging.basicConfig(
     format="%(asctime)s %(levelname)-8s %(message)s",
diff --git a/python/cudf/cudf/_fuzz_testing/csv.py b/python/cudf/cudf/_fuzz_testing/csv.py
index 84346ed61ad..0acb9c8a471 100644
--- a/python/cudf/cudf/_fuzz_testing/csv.py
+++ b/python/cudf/cudf/_fuzz_testing/csv.py
@@ -12,7 +12,7 @@
     _generate_rand_meta,
     pyarrow_to_pandas,
 )
-from cudf.tests import dataset_generator as dg
+from cudf.testing import dataset_generator as dg
 from cudf.utils.dtypes import pandas_dtypes_to_cudf_dtypes
 
 logging.basicConfig(
diff --git a/python/cudf/cudf/_fuzz_testing/json.py b/python/cudf/cudf/_fuzz_testing/json.py
index 5ecb27f7665..df9226cf059 100644
--- a/python/cudf/cudf/_fuzz_testing/json.py
+++ b/python/cudf/cudf/_fuzz_testing/json.py
@@ -13,7 +13,7 @@
     _generate_rand_meta,
     pyarrow_to_pandas,
 )
-from cudf.tests import dataset_generator as dg
+from cudf.testing import dataset_generator as dg
 from cudf.utils.dtypes import pandas_dtypes_to_cudf_dtypes
 
 logging.basicConfig(
diff --git a/python/cudf/cudf/_fuzz_testing/orc.py b/python/cudf/cudf/_fuzz_testing/orc.py
index 607294a49c9..2aa01eb3967 100644
--- a/python/cudf/cudf/_fuzz_testing/orc.py
+++ b/python/cudf/cudf/_fuzz_testing/orc.py
@@ -16,7 +16,7 @@
     pandas_to_orc,
     pyarrow_to_pandas,
 )
-from cudf.tests import dataset_generator as dg
+from cudf.testing import dataset_generator as dg
 
 logging.basicConfig(
     format="%(asctime)s %(levelname)-8s %(message)s",
diff --git a/python/cudf/cudf/_fuzz_testing/parquet.py b/python/cudf/cudf/_fuzz_testing/parquet.py
index 8c63b12d972..5b00f96d88d 100644
--- a/python/cudf/cudf/_fuzz_testing/parquet.py
+++ b/python/cudf/cudf/_fuzz_testing/parquet.py
@@ -12,7 +12,7 @@
     _generate_rand_meta,
     pyarrow_to_pandas,
 )
-from cudf.tests import dataset_generator as dg
+from cudf.testing import dataset_generator as dg
 
 logging.basicConfig(
     format="%(asctime)s %(levelname)-8s %(message)s",
diff --git a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py
index e6a5d081980..9b6abeb1276 100644
--- a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py
+++ b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py
@@ -13,7 +13,7 @@
     compare_content,
     run_test,
 )
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 @pythonfuzz(data_handle=CSVReader)
diff --git a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py
index f3da03f447b..2f5e6204f7c 100644
--- a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py
+++ b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py
@@ -9,7 +9,7 @@
 from cudf._fuzz_testing.json import JSONReader, JSONWriter
 from cudf._fuzz_testing.main import pythonfuzz
 from cudf._fuzz_testing.utils import ALL_POSSIBLE_VALUES, run_test
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 @pythonfuzz(data_handle=JSONReader)
diff --git a/python/cudf/cudf/_fuzz_testing/utils.py b/python/cudf/cudf/_fuzz_testing/utils.py
index 71b5a35a225..fe9ed4d4934 100644
--- a/python/cudf/cudf/_fuzz_testing/utils.py
+++ b/python/cudf/cudf/_fuzz_testing/utils.py
@@ -9,7 +9,7 @@
 import pyorc
 
 import cudf
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 from cudf.utils.dtypes import (
     pandas_dtypes_to_cudf_dtypes,
     pyarrow_dtypes_to_pandas_dtypes,
diff --git a/python/cudf/cudf/_lib/gpuarrow.pyx b/python/cudf/cudf/_lib/gpuarrow.pyx
index 6513cd59424..a7da22637b9 100644
--- a/python/cudf/cudf/_lib/gpuarrow.pyx
+++ b/python/cudf/cudf/_lib/gpuarrow.pyx
@@ -15,7 +15,7 @@ from pyarrow.includes.libarrow cimport (
     CRecordBatchStreamReader
 )
 from pyarrow.lib cimport (
-    _CRecordBatchReader,
+    RecordBatchReader,
     Buffer,
     Schema,
     pyarrow_wrap_schema
@@ -23,7 +23,7 @@ from pyarrow.lib cimport (
 import pyarrow as pa
 
 
-cdef class CudaRecordBatchStreamReader(_CRecordBatchReader):
+cdef class CudaRecordBatchStreamReader(RecordBatchReader):
     cdef:
         CIpcReadOptions options
 
diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx
index 13eedb34c18..e5dfb5a5c35 100644
--- a/python/cudf/cudf/_lib/utils.pyx
+++ b/python/cudf/cudf/_lib/utils.pyx
@@ -138,12 +138,16 @@ cpdef generate_pandas_metadata(Table table, index):
             index_descriptors.append(descr)
 
     metadata = pa.pandas_compat.construct_metadata(
-        table,
-        col_names,
-        index_levels,
-        index_descriptors,
-        index,
-        types,
+        columns_to_convert=[
+            col
+            for col in table._columns
+        ],
+        df=table,
+        column_names=col_names,
+        index_levels=index_levels,
+        index_descriptors=index_descriptors,
+        preserve_index=index,
+        types=types,
     )
 
     md_dict = json.loads(metadata[b"pandas"])
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 436f14cf6e3..0901334396a 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5652,11 +5652,12 @@ def to_arrow(self, preserve_index=True):
 
         out = super(DataFrame, data).to_arrow()
         metadata = pa.pandas_compat.construct_metadata(
-            self,
-            out.schema.names,
-            [self.index],
-            index_descr,
-            preserve_index,
+            columns_to_convert=[self[col] for col in self._data.names],
+            df=self,
+            column_names=out.schema.names,
+            index_levels=[self.index],
+            index_descriptors=index_descr,
+            preserve_index=preserve_index,
             types=out.schema.types,
         )
 
diff --git a/python/cudf/cudf/tests/utils.py b/python/cudf/cudf/testing/_utils.py
similarity index 100%
rename from python/cudf/cudf/tests/utils.py
rename to python/cudf/cudf/testing/_utils.py
diff --git a/python/cudf/cudf/tests/dataset_generator.py b/python/cudf/cudf/testing/dataset_generator.py
similarity index 100%
rename from python/cudf/cudf/tests/dataset_generator.py
rename to python/cudf/cudf/testing/dataset_generator.py
diff --git a/python/cudf/cudf/tests/__init__.py b/python/cudf/cudf/tests/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/python/cudf/cudf/tests/test_apply_rows.py b/python/cudf/cudf/tests/test_apply_rows.py
index 0ba80278fca..f025549971f 100644
--- a/python/cudf/cudf/tests/test_apply_rows.py
+++ b/python/cudf/cudf/tests/test_apply_rows.py
@@ -2,7 +2,7 @@
 
 import cudf
 from cudf.core.column import column
-from cudf.tests.utils import assert_eq, gen_rand_series
+from cudf.testing._utils import assert_eq, gen_rand_series
 
 
 def _kernel_multiply(a, b, out):
diff --git a/python/cudf/cudf/tests/test_applymap.py b/python/cudf/cudf/tests/test_applymap.py
index 1f35bc93c78..fa3c88a3551 100644
--- a/python/cudf/cudf/tests/test_applymap.py
+++ b/python/cudf/cudf/tests/test_applymap.py
@@ -7,7 +7,7 @@
 import pytest
 
 from cudf import Series
-from cudf.tests import utils
+from cudf.testing import _utils as utils
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_array_function.py b/python/cudf/cudf/tests/test_array_function.py
index 03f9cf1d7e5..cd4dd28f179 100644
--- a/python/cudf/cudf/tests/test_array_function.py
+++ b/python/cudf/cudf/tests/test_array_function.py
@@ -4,7 +4,7 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 from cudf.utils.utils import IS_NEP18_ACTIVE
 
 missing_arrfunc_cond = not IS_NEP18_ACTIVE
diff --git a/python/cudf/cudf/tests/test_array_ufunc.py b/python/cudf/cudf/tests/test_array_ufunc.py
index f9e0bb2ce8a..8cfcf4d2b6d 100644
--- a/python/cudf/cudf/tests/test_array_ufunc.py
+++ b/python/cudf/cudf/tests/test_array_ufunc.py
@@ -1,9 +1,10 @@
-import cudf
-import numpy as np
 import cupy as cp
+import numpy as np
 import pandas as pd
 import pytest
-from cudf.tests.utils import assert_eq
+
+import cudf
+from cudf.testing._utils import assert_eq
 
 
 @pytest.fixture
diff --git a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
index a52ee937574..48e3b0ec42c 100644
--- a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
+++ b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
@@ -18,7 +18,7 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 def cudf_from_avro_util(schema, records):
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index 090e03c9403..1c97cbb10ff 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -14,7 +14,7 @@
 import cudf
 from cudf.core import Series
 from cudf.core.index import as_index
-from cudf.tests import utils
+from cudf.testing import _utils as utils
 from cudf.utils.dtypes import (
     BOOL_TYPES,
     DATETIME_TYPES,
@@ -1742,12 +1742,6 @@ def test_binops_with_NA_consistent(dtype, op):
         assert result._column.null_count == len(data)
 
 
-def _decimal_series(input, dtype):
-    return cudf.Series(
-        [x if x is None else decimal.Decimal(x) for x in input], dtype=dtype,
-    )
-
-
 @pytest.mark.parametrize(
     "args",
     [
@@ -2080,10 +2074,10 @@ def _decimal_series(input, dtype):
 def test_binops_decimal(args):
     op, lhs, l_dtype, rhs, r_dtype, expect, expect_dtype = args
 
-    a = _decimal_series(lhs, l_dtype)
-    b = _decimal_series(rhs, r_dtype)
+    a = utils._decimal_series(lhs, l_dtype)
+    b = utils._decimal_series(rhs, r_dtype)
     expect = (
-        _decimal_series(expect, expect_dtype)
+        utils._decimal_series(expect, expect_dtype)
         if isinstance(expect_dtype, cudf.Decimal64Dtype)
         else cudf.Series(expect, dtype=expect_dtype)
     )
@@ -2242,7 +2236,7 @@ def test_binops_decimal(args):
         ),
     ],
 )
-@pytest.mark.parametrize("integer_dtype", cudf.tests.utils.INTEGER_TYPES)
+@pytest.mark.parametrize("integer_dtype", utils.INTEGER_TYPES)
 @pytest.mark.parametrize("reflected", [True, False])
 def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected):
     """
@@ -2258,7 +2252,7 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected):
     else:
         op, ldata, ldtype, rdata, _, expected = args
 
-    lhs = _decimal_series(ldata, ldtype)
+    lhs = utils._decimal_series(ldata, ldtype)
     rhs = cudf.Series(rdata, dtype=integer_dtype)
 
     if reflected:
@@ -2746,7 +2740,7 @@ def test_binops_decimal_scalar_compare(args, reflected):
     else:
         op, ldata, ldtype, rdata, _, expected = args
 
-    lhs = _decimal_series(ldata, ldtype)
+    lhs = utils._decimal_series(ldata, ldtype)
     rhs = rdata
 
     if reflected:
diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index 7b1aea174c8..6a23f568348 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -9,7 +9,11 @@
 
 import cudf
 from cudf.core._compat import PANDAS_GE_110
-from cudf.tests.utils import NUMERIC_TYPES, assert_eq, assert_exceptions_equal
+from cudf.testing._utils import (
+    NUMERIC_TYPES,
+    assert_eq,
+    assert_exceptions_equal,
+)
 
 
 @pytest.fixture
diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py
index 3ac6cc0bb44..f3387b3d27d 100644
--- a/python/cudf/cudf/tests/test_column.py
+++ b/python/cudf/cudf/tests/test_column.py
@@ -8,7 +8,7 @@
 import cudf
 from cudf._lib.transform import mask_to_bools
 from cudf.core.column.column import as_column
-from cudf.tests.utils import assert_eq, assert_exceptions_equal
+from cudf.testing._utils import assert_eq, assert_exceptions_equal
 from cudf.utils import dtypes as dtypeutils
 
 dtypes = sorted(
@@ -140,8 +140,8 @@ def test_column_series_multi_dim(data):
 @pytest.mark.parametrize(
     ("data", "error"),
     [
-        ([1, "1.0", "2", -3], TypeError),
-        ([np.nan, 0, "null", cp.nan], TypeError),
+        ([1, "1.0", "2", -3], pa.lib.ArrowInvalid),
+        ([np.nan, 0, "null", cp.nan], pa.lib.ArrowInvalid),
         (
             [np.int32(4), np.float64(1.5), np.float32(1.290994), np.int8(0)],
             None,
@@ -152,7 +152,7 @@ def test_column_mixed_dtype(data, error):
     if error is None:
         cudf.Series(data)
     else:
-        with pytest.raises(TypeError):
+        with pytest.raises(error):
             cudf.Series(data)
 
 
diff --git a/python/cudf/cudf/tests/test_column_accessor.py b/python/cudf/cudf/tests/test_column_accessor.py
index 86a7927dcac..99d4bdd9910 100644
--- a/python/cudf/cudf/tests/test_column_accessor.py
+++ b/python/cudf/cudf/tests/test_column_accessor.py
@@ -6,7 +6,7 @@
 
 import cudf
 from cudf.core.column_accessor import ColumnAccessor
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 simple_test_data = [
     {},
diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py
index 5c4c121db4d..2578cb13bff 100644
--- a/python/cudf/cudf/tests/test_concat.py
+++ b/python/cudf/cudf/tests/test_concat.py
@@ -1,16 +1,16 @@
 # Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 import re
+from decimal import Decimal
 
 import numpy as np
 import pandas as pd
 import pytest
-from decimal import Decimal
 
 import cudf as gd
-from cudf.tests.utils import assert_eq, assert_exceptions_equal
-from cudf.utils.dtypes import is_categorical_dtype
 from cudf.core.dtypes import Decimal64Dtype
+from cudf.testing._utils import assert_eq, assert_exceptions_equal
+from cudf.utils.dtypes import is_categorical_dtype
 
 
 def make_frames(index=None, nulls="none"):
diff --git a/python/cudf/cudf/tests/test_contains.py b/python/cudf/cudf/tests/test_contains.py
index b669c40022e..b6650600261 100644
--- a/python/cudf/cudf/tests/test_contains.py
+++ b/python/cudf/cudf/tests/test_contains.py
@@ -6,7 +6,7 @@
 
 from cudf import Series
 from cudf.core.index import RangeIndex, as_index
-from cudf.tests.utils import (
+from cudf.testing._utils import (
     DATETIME_TYPES,
     NUMERIC_TYPES,
     TIMEDELTA_TYPES,
diff --git a/python/cudf/cudf/tests/test_copying.py b/python/cudf/cudf/tests/test_copying.py
index ed6a1169a2a..0965b5298a4 100644
--- a/python/cudf/cudf/tests/test_copying.py
+++ b/python/cudf/cudf/tests/test_copying.py
@@ -6,7 +6,7 @@
 
 import cudf
 from cudf.core import Series
-from cudf.tests.utils import NUMERIC_TYPES, OTHER_TYPES, assert_eq
+from cudf.testing._utils import NUMERIC_TYPES, OTHER_TYPES, assert_eq
 
 
 @pytest.mark.parametrize("dtype", NUMERIC_TYPES + OTHER_TYPES)
diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
index 925369048cb..c19fde8b5d6 100644
--- a/python/cudf/cudf/tests/test_csv.py
+++ b/python/cudf/cudf/tests/test_csv.py
@@ -14,7 +14,7 @@
 
 import cudf
 from cudf import read_csv
-from cudf.tests.utils import assert_eq, assert_exceptions_equal
+from cudf.testing._utils import assert_eq, assert_exceptions_equal
 
 
 def make_numeric_dataframe(nrows, dtype):
diff --git a/python/cudf/cudf/tests/test_cuda_apply.py b/python/cudf/cudf/tests/test_cuda_apply.py
index fa880da6804..2604030097b 100644
--- a/python/cudf/cudf/tests/test_cuda_apply.py
+++ b/python/cudf/cudf/tests/test_cuda_apply.py
@@ -9,7 +9,7 @@
 from numba import cuda
 
 from cudf import DataFrame
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 @pytest.mark.parametrize("nelem", [1, 2, 64, 128, 129])
diff --git a/python/cudf/cudf/tests/test_cuda_array_interface.py b/python/cudf/cudf/tests/test_cuda_array_interface.py
index 42e5ab38f50..ecf961f133b 100644
--- a/python/cudf/cudf/tests/test_cuda_array_interface.py
+++ b/python/cudf/cudf/tests/test_cuda_array_interface.py
@@ -10,7 +10,7 @@
 from numba import cuda
 
 import cudf
-from cudf.tests.utils import DATETIME_TYPES, NUMERIC_TYPES, assert_eq
+from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, assert_eq
 
 
 @pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES)
@@ -171,6 +171,9 @@ def test_column_from_ephemeral_cupy_try_lose_reference():
 
 def test_cuda_array_interface_pytorch():
     torch = pytest.importorskip("torch")
+    if not torch.cuda.is_available():
+        pytest.skip("need gpu version of pytorch to be installed")
+
     series = cudf.Series([1, -1, 10, -56])
     tensor = torch.tensor(series)
     got = cudf.Series(tensor)
diff --git a/python/cudf/cudf/tests/test_custom_accessor.py b/python/cudf/cudf/tests/test_custom_accessor.py
index d72b5875677..16e5b345ce2 100644
--- a/python/cudf/cudf/tests/test_custom_accessor.py
+++ b/python/cudf/cudf/tests/test_custom_accessor.py
@@ -2,9 +2,9 @@
 
 import pandas as pd
 import pytest
-import cudf as gd
 
-from cudf.tests.utils import assert_eq
+import cudf as gd
+from cudf.testing._utils import assert_eq
 
 
 @gd.api.extensions.register_dataframe_accessor("point")
diff --git a/python/cudf/cudf/tests/test_cut.py b/python/cudf/cudf/tests/test_cut.py
index 926826ac188..710df78e36b 100644
--- a/python/cudf/cudf/tests/test_cut.py
+++ b/python/cudf/cudf/tests/test_cut.py
@@ -4,11 +4,12 @@
 Test related to Cut
 """
 
-import pandas as pd
 import numpy as np
-from cudf.core.cut import cut
+import pandas as pd
 import pytest
-from cudf.tests.utils import assert_eq
+
+from cudf.core.cut import cut
+from cudf.testing._utils import assert_eq
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 853cfe2e88e..dcd85f0517e 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -20,8 +20,8 @@
 import cudf
 from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_120
 from cudf.core.column import column
-from cudf.tests import utils
-from cudf.tests.utils import (
+from cudf.testing import _utils as utils
+from cudf.testing._utils import (
     ALL_TYPES,
     DATETIME_TYPES,
     NUMERIC_TYPES,
diff --git a/python/cudf/cudf/tests/test_dataframe_copy.py b/python/cudf/cudf/tests/test_dataframe_copy.py
index 35788e660ea..5b258c760b3 100644
--- a/python/cudf/cudf/tests/test_dataframe_copy.py
+++ b/python/cudf/cudf/tests/test_dataframe_copy.py
@@ -7,7 +7,7 @@
 from numba import cuda
 
 from cudf.core.dataframe import DataFrame
-from cudf.tests.utils import ALL_TYPES, assert_eq
+from cudf.testing._utils import ALL_TYPES, assert_eq
 
 """
 DataFrame copy expectations
diff --git a/python/cudf/cudf/tests/test_datasets.py b/python/cudf/cudf/tests/test_datasets.py
index a603a6b4658..b7bc89f008d 100644
--- a/python/cudf/cudf/tests/test_datasets.py
+++ b/python/cudf/cudf/tests/test_datasets.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 import cudf as gd
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 def test_dataset_timeseries():
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 647ff5250ba..653ee8389fa 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -14,7 +14,7 @@
 import cudf
 from cudf.core import DataFrame, Series
 from cudf.core.index import DatetimeIndex
-from cudf.tests.utils import (
+from cudf.testing._utils import (
     DATETIME_TYPES,
     NUMERIC_TYPES,
     assert_eq,
diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py
index bf1845b9315..7faf087769d 100644
--- a/python/cudf/cudf/tests/test_decimal.py
+++ b/python/cudf/cudf/tests/test_decimal.py
@@ -10,7 +10,7 @@
 import cudf
 from cudf.core.column import DecimalColumn, NumericalColumn
 from cudf.core.dtypes import Decimal64Dtype
-from cudf.tests.utils import (
+from cudf.testing._utils import (
     FLOAT_TYPES,
     INTEGER_TYPES,
     NUMERIC_TYPES,
diff --git a/python/cudf/cudf/tests/test_dlpack.py b/python/cudf/cudf/tests/test_dlpack.py
index b8175d05137..4b2fca0d12d 100644
--- a/python/cudf/cudf/tests/test_dlpack.py
+++ b/python/cudf/cudf/tests/test_dlpack.py
@@ -8,7 +8,7 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 nelems = [0, 3, 10]
 dtype = [np.uint16, np.int32, np.float64]
diff --git a/python/cudf/cudf/tests/test_dropna.py b/python/cudf/cudf/tests/test_dropna.py
index d01627309d6..e1d0c38c760 100644
--- a/python/cudf/cudf/tests/test_dropna.py
+++ b/python/cudf/cudf/tests/test_dropna.py
@@ -5,7 +5,7 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py
index a5895caf49f..41d7f5d215e 100644
--- a/python/cudf/cudf/tests/test_dtypes.py
+++ b/python/cudf/cudf/tests/test_dtypes.py
@@ -14,7 +14,7 @@
     ListDtype,
     StructDtype,
 )
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 from cudf.utils.dtypes import np_to_pa_dtype
 
 
diff --git a/python/cudf/cudf/tests/test_duplicates.py b/python/cudf/cudf/tests/test_duplicates.py
index f721b7a28e5..f464ac1a6c2 100644
--- a/python/cudf/cudf/tests/test_duplicates.py
+++ b/python/cudf/cudf/tests/test_duplicates.py
@@ -9,7 +9,7 @@
 
 import cudf
 from cudf import concat
-from cudf.tests.utils import assert_eq, assert_exceptions_equal
+from cudf.testing._utils import assert_eq, assert_exceptions_equal
 
 # TODO: PANDAS 1.0 support
 # Revisit drop_duplicates() tests to update parameters like ignore_index.
diff --git a/python/cudf/cudf/tests/test_factorize.py b/python/cudf/cudf/tests/test_factorize.py
index 48ffef4a11c..3df0031745e 100644
--- a/python/cudf/cudf/tests/test_factorize.py
+++ b/python/cudf/cudf/tests/test_factorize.py
@@ -7,7 +7,7 @@
 
 import cudf
 from cudf.core import DataFrame, Index
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 @pytest.mark.parametrize("ncats,nelem", [(2, 2), (2, 10), (10, 100)])
diff --git a/python/cudf/cudf/tests/test_feather.py b/python/cudf/cudf/tests/test_feather.py
index 525b88fc7ff..6c83ee3c458 100644
--- a/python/cudf/cudf/tests/test_feather.py
+++ b/python/cudf/cudf/tests/test_feather.py
@@ -10,7 +10,7 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import NUMERIC_TYPES, assert_eq
+from cudf.testing._utils import NUMERIC_TYPES, assert_eq
 
 if LooseVersion(pd.__version__) < LooseVersion("0.24"):
     try:
diff --git a/python/cudf/cudf/tests/test_fill.py b/python/cudf/cudf/tests/test_fill.py
index 83d15b36e64..efbe2834486 100644
--- a/python/cudf/cudf/tests/test_fill.py
+++ b/python/cudf/cudf/tests/test_fill.py
@@ -2,7 +2,7 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_gcs.py b/python/cudf/cudf/tests/test_gcs.py
index 5d287a57df8..99d79e41520 100644
--- a/python/cudf/cudf/tests/test_gcs.py
+++ b/python/cudf/cudf/tests/test_gcs.py
@@ -10,7 +10,7 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 gcsfs = pytest.importorskip("gcsfs")
 
diff --git a/python/cudf/cudf/tests/test_gpu_arrow_parser.py b/python/cudf/cudf/tests/test_gpu_arrow_parser.py
index e3c8e69695d..a088ae9f923 100644
--- a/python/cudf/cudf/tests/test_gpu_arrow_parser.py
+++ b/python/cudf/cudf/tests/test_gpu_arrow_parser.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2018, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+
 import logging
 
 import numpy as np
@@ -8,7 +9,7 @@
 
 import cudf
 from cudf.comm.gpuarrow import GpuArrowReader
-from cudf.tests.utils import INTEGER_TYPES
+from cudf.testing._utils import INTEGER_TYPES
 
 
 def make_gpu_parse_arrow_data_batch():
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index f346edb4304..e423a64fe4d 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -15,14 +15,14 @@
 import cudf
 from cudf.core import DataFrame, Series
 from cudf.core._compat import PANDAS_GE_110
-from cudf.tests.dataset_generator import rand_dataframe
-from cudf.tests.utils import (
+from cudf.testing._utils import (
     DATETIME_TYPES,
     SIGNED_TYPES,
     TIMEDELTA_TYPES,
     assert_eq,
     assert_exceptions_equal,
 )
+from cudf.testing.dataset_generator import rand_dataframe
 
 _now = np.datetime64("now")
 _tomorrow = _now + np.timedelta64(1, "D")
diff --git a/python/cudf/cudf/tests/test_hdf.py b/python/cudf/cudf/tests/test_hdf.py
index f908d5f51f5..1bf91a52c2f 100644
--- a/python/cudf/cudf/tests/test_hdf.py
+++ b/python/cudf/cudf/tests/test_hdf.py
@@ -8,7 +8,7 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import DATETIME_TYPES, NUMERIC_TYPES, assert_eq
+from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, assert_eq
 
 try:
     import tables  # noqa F401
diff --git a/python/cudf/cudf/tests/test_hdfs.py b/python/cudf/cudf/tests/test_hdfs.py
index e3867c620fe..24554f113bb 100644
--- a/python/cudf/cudf/tests/test_hdfs.py
+++ b/python/cudf/cudf/tests/test_hdfs.py
@@ -11,7 +11,7 @@
 from pyarrow import orc as orc
 
 import cudf
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 if not os.environ.get("RUN_HDFS_TESTS"):
     pytestmark = pytest.mark.skip("Env not configured to run HDFS tests")
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index 0d3380343f4..3e35225e3b0 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -21,7 +21,7 @@
     RangeIndex,
     as_index,
 )
-from cudf.tests.utils import (
+from cudf.testing._utils import (
     FLOAT_TYPES,
     NUMERIC_TYPES,
     OTHER_TYPES,
diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index 1d34f7636da..58d39ff35a6 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -9,8 +9,12 @@
 
 import cudf
 from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_120
-from cudf.tests import utils
-from cudf.tests.utils import INTEGER_TYPES, assert_eq, assert_exceptions_equal
+from cudf.testing import _utils as utils
+from cudf.testing._utils import (
+    INTEGER_TYPES,
+    assert_eq,
+    assert_exceptions_equal,
+)
 
 index_dtypes = INTEGER_TYPES
 
diff --git a/python/cudf/cudf/tests/test_interval.py b/python/cudf/cudf/tests/test_interval.py
index c7eafedd409..fc193441113 100644
--- a/python/cudf/cudf/tests/test_interval.py
+++ b/python/cudf/cudf/tests/test_interval.py
@@ -4,7 +4,7 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py
index 2d8f451abb9..7b56f864272 100644
--- a/python/cudf/cudf/tests/test_joining.py
+++ b/python/cudf/cudf/tests/test_joining.py
@@ -7,7 +7,7 @@
 import cudf
 from cudf.core._compat import PANDAS_GE_120
 from cudf.core.dtypes import CategoricalDtype, Decimal64Dtype
-from cudf.tests.utils import (
+from cudf.testing._utils import (
     INTEGER_TYPES,
     NUMERIC_TYPES,
     assert_eq,
@@ -96,7 +96,7 @@ def assert_join_results_equal(expect, got, how, **kwargs):
             got.sort_values(got.columns.to_list()).reset_index(drop=True),
             **kwargs,
         )
-    elif isinstance(expect, (pd.Index, cudf.BaseIndex)):
+    elif isinstance(expect, (pd.Index, cudf.Index)):
         return assert_eq(expect.sort_values(), got.sort_values(), **kwargs)
     else:
         raise ValueError(f"Not a join result: {type(expect).__name__}")
@@ -1922,3 +1922,193 @@ def test_join_merge_invalid_keys(on, how):
     with pytest.raises(KeyError):
         pd_left.merge(pd_right, on=on)
         gd_left.merge(gd_right, on=on)
+
+
+@pytest.mark.parametrize(
+    "str_data",
+    [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]],
+)
+@pytest.mark.parametrize("num_keys", [1, 2, 3])
+@pytest.mark.parametrize("how", ["left", "right", "inner", "outer"])
+def test_string_join_key(str_data, num_keys, how):
+    other_data = [1, 2, 3, 4, 5][: len(str_data)]
+
+    pdf = pd.DataFrame()
+    gdf = cudf.DataFrame()
+    for i in range(num_keys):
+        pdf[i] = pd.Series(str_data, dtype="str")
+        gdf[i] = cudf.Series(str_data, dtype="str")
+    pdf["a"] = other_data
+    gdf["a"] = other_data
+
+    pdf2 = pdf.copy()
+    gdf2 = gdf.copy()
+
+    expect = pdf.merge(pdf2, on=list(range(num_keys)), how=how)
+    got = gdf.merge(gdf2, on=list(range(num_keys)), how=how)
+
+    if len(expect) == 0 and len(got) == 0:
+        expect = expect.reset_index(drop=True)
+        got = got[expect.columns]  # reorder columns
+
+    if how == "right":
+        got = got[expect.columns]  # reorder columns
+
+    assert_join_results_equal(expect, got, how=how)
+
+
+@pytest.mark.parametrize(
+    "str_data_nulls",
+    [
+        ["a", "b", "c"],
+        ["a", "b", "f", "g"],
+        ["f", "g", "h", "i", "j"],
+        ["f", "g", "h"],
+        [None, None, None, None, None],
+        [],
+    ],
+)
+def test_string_join_key_nulls(str_data_nulls):
+    str_data = ["a", "b", "c", "d", "e"]
+    other_data = [1, 2, 3, 4, 5]
+
+    other_data_nulls = [6, 7, 8, 9, 10][: len(str_data_nulls)]
+
+    pdf = pd.DataFrame()
+    gdf = cudf.DataFrame()
+    pdf["key"] = pd.Series(str_data, dtype="str")
+    gdf["key"] = cudf.Series(str_data, dtype="str")
+    pdf["vals"] = other_data
+    gdf["vals"] = other_data
+
+    pdf2 = pd.DataFrame()
+    gdf2 = cudf.DataFrame()
+    pdf2["key"] = pd.Series(str_data_nulls, dtype="str")
+    gdf2["key"] = cudf.Series(str_data_nulls, dtype="str")
+    pdf2["vals"] = pd.Series(other_data_nulls, dtype="int64")
+    gdf2["vals"] = cudf.Series(other_data_nulls, dtype="int64")
+
+    expect = pdf.merge(pdf2, on="key", how="left")
+    got = gdf.merge(gdf2, on="key", how="left")
+    got["vals_y"] = got["vals_y"].fillna(-1)
+
+    if len(expect) == 0 and len(got) == 0:
+        expect = expect.reset_index(drop=True)
+        got = got[expect.columns]
+
+    expect["vals_y"] = expect["vals_y"].fillna(-1).astype("int64")
+
+    assert_join_results_equal(expect, got, how="left")
+
+
+@pytest.mark.parametrize(
+    "str_data", [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]]
+)
+@pytest.mark.parametrize("num_cols", [1, 2, 3])
+@pytest.mark.parametrize("how", ["left", "right", "inner", "outer"])
+def test_string_join_non_key(str_data, num_cols, how):
+    other_data = [1, 2, 3, 4, 5][: len(str_data)]
+
+    pdf = pd.DataFrame()
+    gdf = cudf.DataFrame()
+    for i in range(num_cols):
+        pdf[i] = pd.Series(str_data, dtype="str")
+        gdf[i] = cudf.Series(str_data, dtype="str")
+    pdf["a"] = other_data
+    gdf["a"] = other_data
+
+    pdf2 = pdf.copy()
+    gdf2 = gdf.copy()
+
+    expect = pdf.merge(pdf2, on=["a"], how=how)
+    got = gdf.merge(gdf2, on=["a"], how=how)
+
+    if len(expect) == 0 and len(got) == 0:
+        expect = expect.reset_index(drop=True)
+        got = got[expect.columns]
+
+    if how == "right":
+        got = got[expect.columns]  # reorder columns
+
+    assert_join_results_equal(expect, got, how=how)
+
+
+@pytest.mark.parametrize(
+    "str_data_nulls",
+    [
+        ["a", "b", "c"],
+        ["a", "b", "f", "g"],
+        ["f", "g", "h", "i", "j"],
+        ["f", "g", "h"],
+        [None, None, None, None, None],
+        [],
+    ],
+)
+def test_string_join_non_key_nulls(str_data_nulls):
+    str_data = ["a", "b", "c", "d", "e"]
+    other_data = [1, 2, 3, 4, 5]
+
+    other_data_nulls = [6, 7, 8, 9, 10][: len(str_data_nulls)]
+
+    pdf = pd.DataFrame()
+    gdf = cudf.DataFrame()
+    pdf["vals"] = pd.Series(str_data, dtype="str")
+    gdf["vals"] = cudf.Series(str_data, dtype="str")
+    pdf["key"] = other_data
+    gdf["key"] = other_data
+
+    pdf2 = pd.DataFrame()
+    gdf2 = cudf.DataFrame()
+    pdf2["vals"] = pd.Series(str_data_nulls, dtype="str")
+    gdf2["vals"] = cudf.Series(str_data_nulls, dtype="str")
+    pdf2["key"] = pd.Series(other_data_nulls, dtype="int64")
+    gdf2["key"] = cudf.Series(other_data_nulls, dtype="int64")
+
+    expect = pdf.merge(pdf2, on="key", how="left")
+    got = gdf.merge(gdf2, on="key", how="left")
+
+    if len(expect) == 0 and len(got) == 0:
+        expect = expect.reset_index(drop=True)
+        got = got[expect.columns]
+
+    assert_join_results_equal(expect, got, how="left")
+
+
+def test_string_join_values_nulls():
+    left_dict = [
+        {"b": "MATCH 1", "a": 1.0},
+        {"b": "MATCH 1", "a": 1.0},
+        {"b": "LEFT NO MATCH 1", "a": -1.0},
+        {"b": "MATCH 2", "a": 2.0},
+        {"b": "MATCH 2", "a": 2.0},
+        {"b": "MATCH 1", "a": 1.0},
+        {"b": "MATCH 1", "a": 1.0},
+        {"b": "MATCH 2", "a": 2.0},
+        {"b": "MATCH 2", "a": 2.0},
+        {"b": "LEFT NO MATCH 2", "a": -2.0},
+        {"b": "MATCH 3", "a": 3.0},
+        {"b": "MATCH 3", "a": 3.0},
+    ]
+
+    right_dict = [
+        {"b": "RIGHT NO MATCH 1", "c": -1.0},
+        {"b": "MATCH 3", "c": 3.0},
+        {"b": "MATCH 2", "c": 2.0},
+        {"b": "RIGHT NO MATCH 2", "c": -2.0},
+        {"b": "RIGHT NO MATCH 3", "c": -3.0},
+        {"b": "MATCH 1", "c": 1.0},
+    ]
+
+    left_pdf = pd.DataFrame(left_dict)
+    right_pdf = pd.DataFrame(right_dict)
+
+    left_gdf = cudf.DataFrame.from_pandas(left_pdf)
+    right_gdf = cudf.DataFrame.from_pandas(right_pdf)
+
+    expect = left_pdf.merge(right_pdf, how="left", on="b")
+    got = left_gdf.merge(right_gdf, how="left", on="b")
+
+    expect = expect.sort_values(by=["a", "b", "c"]).reset_index(drop=True)
+    got = got.sort_values(by=["a", "b", "c"]).reset_index(drop=True)
+
+    assert_join_results_equal(expect, got, how="left")
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index 2da2cea164f..0b138f446ae 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -12,7 +12,7 @@
 
 import cudf
 from cudf.core._compat import PANDAS_GE_110
-from cudf.tests.utils import DATETIME_TYPES, NUMERIC_TYPES, assert_eq
+from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, assert_eq
 
 
 def make_numeric_dataframe(nrows, dtype):
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index 42541f1e8b1..a6a9ba97ef5 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -9,7 +9,7 @@
 import cudf
 from cudf import NA
 from cudf._lib.copying import get_element
-from cudf.tests.utils import (
+from cudf.testing._utils import (
     DATETIME_TYPES,
     NUMERIC_TYPES,
     TIMEDELTA_TYPES,
diff --git a/python/cudf/cudf/tests/test_monotonic.py b/python/cudf/cudf/tests/test_monotonic.py
index b26887ad6ae..e9c828ec0f5 100644
--- a/python/cudf/cudf/tests/test_monotonic.py
+++ b/python/cudf/cudf/tests/test_monotonic.py
@@ -16,7 +16,7 @@
     RangeIndex,
     StringIndex,
 )
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 @pytest.mark.parametrize("testrange", [(10, 20, 1), (0, -10, -1), (5, 5, 1)])
diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py
index bd78612d6c7..c8e5a9f071b 100644
--- a/python/cudf/cudf/tests/test_multiindex.py
+++ b/python/cudf/cudf/tests/test_multiindex.py
@@ -15,7 +15,7 @@
 import cudf
 from cudf.core.column import as_column
 from cudf.core.index import as_index
-from cudf.tests.utils import assert_eq, assert_exceptions_equal, assert_neq
+from cudf.testing._utils import assert_eq, assert_exceptions_equal, assert_neq
 
 
 def test_multiindex_levels_codes_validation():
diff --git a/python/cudf/cudf/tests/test_numerical.py b/python/cudf/cudf/tests/test_numerical.py
index 12b17447268..7a766a49a62 100644
--- a/python/cudf/cudf/tests/test_numerical.py
+++ b/python/cudf/cudf/tests/test_numerical.py
@@ -6,7 +6,7 @@
 
 import cudf
 from cudf.core._compat import PANDAS_GE_100
-from cudf.tests.utils import NUMERIC_TYPES, assert_eq
+from cudf.testing._utils import NUMERIC_TYPES, assert_eq
 from cudf.utils.dtypes import cudf_dtypes_to_pandas_dtypes
 
 
diff --git a/python/cudf/cudf/tests/test_numpy_interop.py b/python/cudf/cudf/tests/test_numpy_interop.py
index 521840f8a8a..e5efe2f027d 100644
--- a/python/cudf/cudf/tests/test_numpy_interop.py
+++ b/python/cudf/cudf/tests/test_numpy_interop.py
@@ -2,7 +2,7 @@
 import pytest
 
 from cudf.core import DataFrame, Series
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 def test_to_records_noindex():
diff --git a/python/cudf/cudf/tests/test_onehot.py b/python/cudf/cudf/tests/test_onehot.py
index 61195faa4d0..bbec4594e15 100644
--- a/python/cudf/cudf/tests/test_onehot.py
+++ b/python/cudf/cudf/tests/test_onehot.py
@@ -8,7 +8,7 @@
 
 import cudf
 from cudf.core import DataFrame, GenericIndex, Series
-from cudf.tests import utils
+from cudf.testing import _utils as utils
 
 
 def test_onehot_simple():
diff --git a/python/cudf/cudf/tests/test_ops.py b/python/cudf/cudf/tests/test_ops.py
index 8cdef19d9ba..ac3f784ecd4 100644
--- a/python/cudf/cudf/tests/test_ops.py
+++ b/python/cudf/cudf/tests/test_ops.py
@@ -5,7 +5,7 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import assert_eq, gen_rand
+from cudf.testing._utils import assert_eq, gen_rand
 
 
 def test_sqrt_float():
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index bd8131d4673..213b7bf39d7 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -15,7 +15,11 @@
 import cudf
 from cudf.core.dtypes import Decimal64Dtype
 from cudf.io.orc import ORCWriter
-from cudf.tests.utils import assert_eq, gen_rand_series, supported_numpy_dtypes
+from cudf.testing._utils import (
+    assert_eq,
+    gen_rand_series,
+    supported_numpy_dtypes,
+)
 
 
 @pytest.fixture(scope="module")
diff --git a/python/cudf/cudf/tests/test_pandas_interop.py b/python/cudf/cudf/tests/test_pandas_interop.py
index 24c60f12a2f..a8a45fc3c28 100644
--- a/python/cudf/cudf/tests/test_pandas_interop.py
+++ b/python/cudf/cudf/tests/test_pandas_interop.py
@@ -5,7 +5,7 @@
 
 import cudf
 from cudf.core import DataFrame
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 def test_to_pandas():
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index 54bf17e4c2b..2d0a4006f44 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -18,8 +18,8 @@
 
 import cudf
 from cudf.io.parquet import ParquetWriter, merge_parquet_filemetadata
-from cudf.tests import dataset_generator as dg
-from cudf.tests.utils import (
+from cudf.testing import dataset_generator as dg
+from cudf.testing._utils import (
     TIMEDELTA_TYPES,
     assert_eq,
     assert_exceptions_equal,
diff --git a/python/cudf/cudf/tests/test_pickling.py b/python/cudf/cudf/tests/test_pickling.py
index ca819c7f59b..48a25fcfadb 100644
--- a/python/cudf/cudf/tests/test_pickling.py
+++ b/python/cudf/cudf/tests/test_pickling.py
@@ -8,7 +8,7 @@
 
 from cudf.core import DataFrame, GenericIndex, Series
 from cudf.core.buffer import Buffer
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 if sys.version_info < (3, 8):
     try:
diff --git a/python/cudf/cudf/tests/test_quantiles.py b/python/cudf/cudf/tests/test_quantiles.py
index 49a2603b9a3..4055485c49a 100644
--- a/python/cudf/cudf/tests/test_quantiles.py
+++ b/python/cudf/cudf/tests/test_quantiles.py
@@ -1,7 +1,7 @@
 import pandas as pd
 
 import cudf
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 def test_single_q():
diff --git a/python/cudf/cudf/tests/test_query.py b/python/cudf/cudf/tests/test_query.py
index b6915a63947..8dc5df2dd7c 100644
--- a/python/cudf/cudf/tests/test_query.py
+++ b/python/cudf/cudf/tests/test_query.py
@@ -12,7 +12,7 @@
 
 import cudf
 from cudf.core import DataFrame
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 from cudf.utils import queryutils
 
 _params_query_parser = []
diff --git a/python/cudf/cudf/tests/test_query_mask.py b/python/cudf/cudf/tests/test_query_mask.py
index 35479f8308c..ab1c085c6c0 100644
--- a/python/cudf/cudf/tests/test_query_mask.py
+++ b/python/cudf/cudf/tests/test_query_mask.py
@@ -3,7 +3,7 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 _data = [
     {"a": [0, 1.0, 2.0, None, np.nan, None, 3, 5]},
diff --git a/python/cudf/cudf/tests/test_rank.py b/python/cudf/cudf/tests/test_rank.py
index c86b2c61aa5..3c98496def3 100644
--- a/python/cudf/cudf/tests/test_rank.py
+++ b/python/cudf/cudf/tests/test_rank.py
@@ -7,7 +7,7 @@
 import pytest
 
 from cudf.core import DataFrame
-from cudf.tests.utils import assert_eq, assert_exceptions_equal
+from cudf.testing._utils import assert_eq, assert_exceptions_equal
 
 
 @pytest.fixture
diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py
index 0fa09bc5df7..7cbc56f943c 100644
--- a/python/cudf/cudf/tests/test_reductions.py
+++ b/python/cudf/cudf/tests/test_reductions.py
@@ -13,8 +13,8 @@
 import cudf
 from cudf.core import Series
 from cudf.core.dtypes import Decimal64Dtype
-from cudf.tests import utils
-from cudf.tests.utils import NUMERIC_TYPES, assert_eq, gen_rand
+from cudf.testing import _utils as utils
+from cudf.testing._utils import NUMERIC_TYPES, assert_eq, gen_rand
 
 params_dtype = NUMERIC_TYPES
 
diff --git a/python/cudf/cudf/tests/test_replace.py b/python/cudf/cudf/tests/test_replace.py
index 6dca539b8d5..b59428779c1 100644
--- a/python/cudf/cudf/tests/test_replace.py
+++ b/python/cudf/cudf/tests/test_replace.py
@@ -1,15 +1,15 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 import re
+from decimal import Decimal
 
 import numpy as np
 import pandas as pd
 import pytest
-from decimal import Decimal
 
 import cudf
 from cudf.core.dtypes import Decimal64Dtype
-from cudf.tests.utils import (
+from cudf.testing._utils import (
     INTEGER_TYPES,
     NUMERIC_TYPES,
     assert_eq,
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index 093be41275a..4906349ecba 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -10,7 +10,7 @@
 
 import cudf
 from cudf.core._compat import PANDAS_GE_110
-from cudf.tests import utils
+from cudf.testing import _utils as utils
 from cudf.utils.dtypes import cudf_dtypes_to_pandas_dtypes
 
 repr_categories = utils.NUMERIC_TYPES + ["str", "category", "datetime64[ns]"]
diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
index b030924779d..0c4313eb47c 100644
--- a/python/cudf/cudf/tests/test_reshape.py
+++ b/python/cudf/cudf/tests/test_reshape.py
@@ -9,7 +9,7 @@
 import cudf
 from cudf import melt as cudf_melt
 from cudf.core._compat import PANDAS_GE_120
-from cudf.tests.utils import (
+from cudf.testing._utils import (
     ALL_TYPES,
     DATETIME_TYPES,
     NUMERIC_TYPES,
diff --git a/python/cudf/cudf/tests/test_rolling.py b/python/cudf/cudf/tests/test_rolling.py
index fcc5591adda..07e7f43c992 100644
--- a/python/cudf/cudf/tests/test_rolling.py
+++ b/python/cudf/cudf/tests/test_rolling.py
@@ -8,7 +8,7 @@
 
 import cudf
 from cudf.core._compat import PANDAS_GE_110
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py
index 2eefcfef7d2..133597b8f19 100644
--- a/python/cudf/cudf/tests/test_s3.py
+++ b/python/cudf/cudf/tests/test_s3.py
@@ -14,7 +14,7 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 moto = pytest.importorskip("moto", minversion="1.3.14")
 boto3 = pytest.importorskip("boto3")
diff --git a/python/cudf/cudf/tests/test_scalar.py b/python/cudf/cudf/tests/test_scalar.py
index 01e6b52f526..605005f41fc 100644
--- a/python/cudf/cudf/tests/test_scalar.py
+++ b/python/cudf/cudf/tests/test_scalar.py
@@ -11,7 +11,7 @@
 import cudf
 from cudf import Scalar as pycudf_scalar
 from cudf._lib.copying import get_element
-from cudf.tests.utils import (
+from cudf.testing._utils import (
     ALL_TYPES,
     DATETIME_TYPES,
     NUMERIC_TYPES,
diff --git a/python/cudf/cudf/tests/test_scan.py b/python/cudf/cudf/tests/test_scan.py
index f7e8c5a8563..0ef7b89a606 100644
--- a/python/cudf/cudf/tests/test_scan.py
+++ b/python/cudf/cudf/tests/test_scan.py
@@ -5,8 +5,13 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import INTEGER_TYPES, NUMERIC_TYPES, assert_eq, gen_rand
 from cudf.core.dtypes import Decimal64Dtype
+from cudf.testing._utils import (
+    INTEGER_TYPES,
+    NUMERIC_TYPES,
+    assert_eq,
+    gen_rand,
+)
 
 params_sizes = [0, 1, 2, 5]
 
diff --git a/python/cudf/cudf/tests/test_search.py b/python/cudf/cudf/tests/test_search.py
index 4c42e2cb50f..c16c6486cd4 100644
--- a/python/cudf/cudf/tests/test_search.py
+++ b/python/cudf/cudf/tests/test_search.py
@@ -5,7 +5,7 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import assert_eq, gen_rand, random_bitmask
+from cudf.testing._utils import assert_eq, gen_rand, random_bitmask
 
 
 @pytest.mark.parametrize("side", ["left", "right"])
diff --git a/python/cudf/cudf/tests/test_serialize.py b/python/cudf/cudf/tests/test_serialize.py
index 4be5adf84de..b436825cf69 100644
--- a/python/cudf/cudf/tests/test_serialize.py
+++ b/python/cudf/cudf/tests/test_serialize.py
@@ -8,8 +8,8 @@
 import pytest
 
 import cudf
-from cudf.tests import utils
-from cudf.tests.utils import assert_eq
+from cudf.testing import _utils as utils
+from cudf.testing._utils import assert_eq
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index d400a9ce8a9..f3da4275aea 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -9,7 +9,7 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import (
+from cudf.testing._utils import (
     DATETIME_TYPES,
     NUMERIC_TYPES,
     TIMEDELTA_TYPES,
diff --git a/python/cudf/cudf/tests/test_seriesmap.py b/python/cudf/cudf/tests/test_seriesmap.py
index 324074b6021..d4ef3ba235d 100644
--- a/python/cudf/cudf/tests/test_seriesmap.py
+++ b/python/cudf/cudf/tests/test_seriesmap.py
@@ -4,12 +4,12 @@
 from math import floor
 
 import numpy as np
-import cudf
 import pandas as pd
 import pytest
 
+import cudf
 from cudf import Series
-from cudf.tests.utils import assert_eq, assert_exceptions_equal
+from cudf.testing._utils import assert_eq, assert_exceptions_equal
 
 
 def test_series_map_basic():
diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py
index 28cb2568908..c7429f3c246 100644
--- a/python/cudf/cudf/tests/test_setitem.py
+++ b/python/cudf/cudf/tests/test_setitem.py
@@ -6,7 +6,7 @@
 
 import cudf
 from cudf.core._compat import PANDAS_GE_120, PANDAS_LE_122
-from cudf.tests.utils import assert_eq, assert_exceptions_equal
+from cudf.testing._utils import assert_eq, assert_exceptions_equal
 
 
 @pytest.mark.parametrize("df", [pd.DataFrame({"a": [1, 2, 3]})])
diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py
index b90aebc33dc..95942045654 100644
--- a/python/cudf/cudf/tests/test_sorting.py
+++ b/python/cudf/cudf/tests/test_sorting.py
@@ -9,7 +9,7 @@
 
 from cudf.core import DataFrame, Series
 from cudf.core.column import NumericalColumn
-from cudf.tests.utils import (
+from cudf.testing._utils import (
     DATETIME_TYPES,
     NUMERIC_TYPES,
     assert_eq,
diff --git a/python/cudf/cudf/tests/test_sparse_df.py b/python/cudf/cudf/tests/test_sparse_df.py
index 4551f48845f..50c8f3f41a8 100644
--- a/python/cudf/cudf/tests/test_sparse_df.py
+++ b/python/cudf/cudf/tests/test_sparse_df.py
@@ -8,7 +8,7 @@
 
 from cudf.comm.gpuarrow import GpuArrowReader
 from cudf.core import DataFrame, Series
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 def read_data():
diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py
index 4e07c974280..d4e944848c9 100644
--- a/python/cudf/cudf/tests/test_stats.py
+++ b/python/cudf/cudf/tests/test_stats.py
@@ -9,7 +9,7 @@
 
 import cudf
 from cudf.datasets import randomdata
-from cudf.tests.utils import assert_eq, assert_exceptions_equal
+from cudf.testing._utils import assert_eq, assert_exceptions_equal
 
 params_dtypes = [np.int32, np.uint32, np.float32, np.float64]
 methods = ["min", "max", "sum", "mean", "var", "std"]
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index 58b3996ab5c..3c153a16a13 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -18,8 +18,7 @@
 from cudf.core._compat import PANDAS_GE_110
 from cudf.core.column.string import StringColumn
 from cudf.core.index import StringIndex, as_index
-from cudf.tests.test_joining import assert_join_results_equal
-from cudf.tests.utils import (
+from cudf.testing._utils import (
     DATETIME_TYPES,
     NUMERIC_TYPES,
     assert_eq,
@@ -919,196 +918,6 @@ def test_string_split(data, pat, n, expand):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize(
-    "str_data",
-    [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]],
-)
-@pytest.mark.parametrize("num_keys", [1, 2, 3])
-@pytest.mark.parametrize("how", ["left", "right", "inner", "outer"])
-def test_string_join_key(str_data, num_keys, how):
-    other_data = [1, 2, 3, 4, 5][: len(str_data)]
-
-    pdf = pd.DataFrame()
-    gdf = cudf.DataFrame()
-    for i in range(num_keys):
-        pdf[i] = pd.Series(str_data, dtype="str")
-        gdf[i] = cudf.Series(str_data, dtype="str")
-    pdf["a"] = other_data
-    gdf["a"] = other_data
-
-    pdf2 = pdf.copy()
-    gdf2 = gdf.copy()
-
-    expect = pdf.merge(pdf2, on=list(range(num_keys)), how=how)
-    got = gdf.merge(gdf2, on=list(range(num_keys)), how=how)
-
-    if len(expect) == 0 and len(got) == 0:
-        expect = expect.reset_index(drop=True)
-        got = got[expect.columns]  # reorder columns
-
-    if how == "right":
-        got = got[expect.columns]  # reorder columns
-
-    assert_join_results_equal(expect, got, how=how)
-
-
-@pytest.mark.parametrize(
-    "str_data_nulls",
-    [
-        ["a", "b", "c"],
-        ["a", "b", "f", "g"],
-        ["f", "g", "h", "i", "j"],
-        ["f", "g", "h"],
-        [None, None, None, None, None],
-        [],
-    ],
-)
-def test_string_join_key_nulls(str_data_nulls):
-    str_data = ["a", "b", "c", "d", "e"]
-    other_data = [1, 2, 3, 4, 5]
-
-    other_data_nulls = [6, 7, 8, 9, 10][: len(str_data_nulls)]
-
-    pdf = pd.DataFrame()
-    gdf = cudf.DataFrame()
-    pdf["key"] = pd.Series(str_data, dtype="str")
-    gdf["key"] = cudf.Series(str_data, dtype="str")
-    pdf["vals"] = other_data
-    gdf["vals"] = other_data
-
-    pdf2 = pd.DataFrame()
-    gdf2 = cudf.DataFrame()
-    pdf2["key"] = pd.Series(str_data_nulls, dtype="str")
-    gdf2["key"] = cudf.Series(str_data_nulls, dtype="str")
-    pdf2["vals"] = pd.Series(other_data_nulls, dtype="int64")
-    gdf2["vals"] = cudf.Series(other_data_nulls, dtype="int64")
-
-    expect = pdf.merge(pdf2, on="key", how="left")
-    got = gdf.merge(gdf2, on="key", how="left")
-    got["vals_y"] = got["vals_y"].fillna(-1)
-
-    if len(expect) == 0 and len(got) == 0:
-        expect = expect.reset_index(drop=True)
-        got = got[expect.columns]
-
-    expect["vals_y"] = expect["vals_y"].fillna(-1).astype("int64")
-
-    assert_join_results_equal(expect, got, how="left")
-
-
-@pytest.mark.parametrize(
-    "str_data", [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]]
-)
-@pytest.mark.parametrize("num_cols", [1, 2, 3])
-@pytest.mark.parametrize("how", ["left", "right", "inner", "outer"])
-def test_string_join_non_key(str_data, num_cols, how):
-    other_data = [1, 2, 3, 4, 5][: len(str_data)]
-
-    pdf = pd.DataFrame()
-    gdf = cudf.DataFrame()
-    for i in range(num_cols):
-        pdf[i] = pd.Series(str_data, dtype="str")
-        gdf[i] = cudf.Series(str_data, dtype="str")
-    pdf["a"] = other_data
-    gdf["a"] = other_data
-
-    pdf2 = pdf.copy()
-    gdf2 = gdf.copy()
-
-    expect = pdf.merge(pdf2, on=["a"], how=how)
-    got = gdf.merge(gdf2, on=["a"], how=how)
-
-    if len(expect) == 0 and len(got) == 0:
-        expect = expect.reset_index(drop=True)
-        got = got[expect.columns]
-
-    if how == "right":
-        got = got[expect.columns]  # reorder columns
-
-    assert_join_results_equal(expect, got, how=how)
-
-
-@pytest.mark.parametrize(
-    "str_data_nulls",
-    [
-        ["a", "b", "c"],
-        ["a", "b", "f", "g"],
-        ["f", "g", "h", "i", "j"],
-        ["f", "g", "h"],
-        [None, None, None, None, None],
-        [],
-    ],
-)
-def test_string_join_non_key_nulls(str_data_nulls):
-    str_data = ["a", "b", "c", "d", "e"]
-    other_data = [1, 2, 3, 4, 5]
-
-    other_data_nulls = [6, 7, 8, 9, 10][: len(str_data_nulls)]
-
-    pdf = pd.DataFrame()
-    gdf = cudf.DataFrame()
-    pdf["vals"] = pd.Series(str_data, dtype="str")
-    gdf["vals"] = cudf.Series(str_data, dtype="str")
-    pdf["key"] = other_data
-    gdf["key"] = other_data
-
-    pdf2 = pd.DataFrame()
-    gdf2 = cudf.DataFrame()
-    pdf2["vals"] = pd.Series(str_data_nulls, dtype="str")
-    gdf2["vals"] = cudf.Series(str_data_nulls, dtype="str")
-    pdf2["key"] = pd.Series(other_data_nulls, dtype="int64")
-    gdf2["key"] = cudf.Series(other_data_nulls, dtype="int64")
-
-    expect = pdf.merge(pdf2, on="key", how="left")
-    got = gdf.merge(gdf2, on="key", how="left")
-
-    if len(expect) == 0 and len(got) == 0:
-        expect = expect.reset_index(drop=True)
-        got = got[expect.columns]
-
-    assert_join_results_equal(expect, got, how="left")
-
-
-def test_string_join_values_nulls():
-    left_dict = [
-        {"b": "MATCH 1", "a": 1.0},
-        {"b": "MATCH 1", "a": 1.0},
-        {"b": "LEFT NO MATCH 1", "a": -1.0},
-        {"b": "MATCH 2", "a": 2.0},
-        {"b": "MATCH 2", "a": 2.0},
-        {"b": "MATCH 1", "a": 1.0},
-        {"b": "MATCH 1", "a": 1.0},
-        {"b": "MATCH 2", "a": 2.0},
-        {"b": "MATCH 2", "a": 2.0},
-        {"b": "LEFT NO MATCH 2", "a": -2.0},
-        {"b": "MATCH 3", "a": 3.0},
-        {"b": "MATCH 3", "a": 3.0},
-    ]
-
-    right_dict = [
-        {"b": "RIGHT NO MATCH 1", "c": -1.0},
-        {"b": "MATCH 3", "c": 3.0},
-        {"b": "MATCH 2", "c": 2.0},
-        {"b": "RIGHT NO MATCH 2", "c": -2.0},
-        {"b": "RIGHT NO MATCH 3", "c": -3.0},
-        {"b": "MATCH 1", "c": 1.0},
-    ]
-
-    left_pdf = pd.DataFrame(left_dict)
-    right_pdf = pd.DataFrame(right_dict)
-
-    left_gdf = cudf.DataFrame.from_pandas(left_pdf)
-    right_gdf = cudf.DataFrame.from_pandas(right_pdf)
-
-    expect = left_pdf.merge(right_pdf, how="left", on="b")
-    got = left_gdf.merge(right_gdf, how="left", on="b")
-
-    expect = expect.sort_values(by=["a", "b", "c"]).reset_index(drop=True)
-    got = got.sort_values(by=["a", "b", "c"]).reset_index(drop=True)
-
-    assert_join_results_equal(expect, got, how="left")
-
-
 @pytest.mark.parametrize(
     "str_data", [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]]
 )
diff --git a/python/cudf/cudf/tests/test_struct.py b/python/cudf/cudf/tests/test_struct.py
index 938d99ff48a..98a14b2e80f 100644
--- a/python/cudf/cudf/tests/test_struct.py
+++ b/python/cudf/cudf/tests/test_struct.py
@@ -6,7 +6,7 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_testing.py b/python/cudf/cudf/tests/test_testing.py
index eee7078433d..b2e5ea70ddc 100644
--- a/python/cudf/cudf/tests/test_testing.py
+++ b/python/cudf/cudf/tests/test_testing.py
@@ -10,7 +10,7 @@
     assert_index_equal,
     assert_series_equal,
 )
-from cudf.tests.utils import NUMERIC_TYPES, OTHER_TYPES, assert_eq
+from cudf.testing._utils import NUMERIC_TYPES, OTHER_TYPES, assert_eq
 
 
 @pytest.mark.parametrize("rdata", [[1, 2, 5], [1, 2, 6], [1, 2, 5, 6]])
diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py
index 072fc23abba..6c3fdd4640a 100644
--- a/python/cudf/cudf/tests/test_text.py
+++ b/python/cudf/cudf/tests/test_text.py
@@ -5,7 +5,7 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 def test_tokenize():
diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py
index 3efc30af01e..a65fdeeb0dd 100644
--- a/python/cudf/cudf/tests/test_timedelta.py
+++ b/python/cudf/cudf/tests/test_timedelta.py
@@ -11,8 +11,8 @@
 
 import cudf
 from cudf.core._compat import PANDAS_GE_120
-from cudf.tests import utils as utils
-from cudf.tests.utils import assert_eq, assert_exceptions_equal
+from cudf.testing import _utils as utils
+from cudf.testing._utils import assert_eq, assert_exceptions_equal
 
 _TIMEDELTA_DATA = [
     [1000000, 200000, 3000000],
diff --git a/python/cudf/cudf/tests/test_transform.py b/python/cudf/cudf/tests/test_transform.py
index ed409de196e..582d5a43edf 100644
--- a/python/cudf/cudf/tests/test_transform.py
+++ b/python/cudf/cudf/tests/test_transform.py
@@ -6,7 +6,7 @@
 import pytest
 
 from cudf.core import Series
-from cudf.tests.utils import NUMERIC_TYPES
+from cudf.testing._utils import NUMERIC_TYPES
 
 supported_types = NUMERIC_TYPES
 
diff --git a/python/cudf/cudf/tests/test_unaops.py b/python/cudf/cudf/tests/test_unaops.py
index f132271cfd8..2089f764724 100644
--- a/python/cudf/cudf/tests/test_unaops.py
+++ b/python/cudf/cudf/tests/test_unaops.py
@@ -10,7 +10,7 @@
 
 import cudf
 from cudf.core import Series
-from cudf.tests import utils
+from cudf.testing import _utils as utils
 
 _unaops = [operator.abs, operator.invert, operator.neg, np.ceil, np.floor]
 
diff --git a/python/custreamz/custreamz/tests/test_kafka.py b/python/custreamz/custreamz/tests/test_kafka.py
index 059655d4ca0..d29ebf8db8b 100644
--- a/python/custreamz/custreamz/tests/test_kafka.py
+++ b/python/custreamz/custreamz/tests/test_kafka.py
@@ -2,7 +2,7 @@
 import confluent_kafka as ck
 import pytest
 
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 
 @pytest.mark.parametrize("commit_offset", [-1, 0, 1, 1000])
diff --git a/python/dask_cudf/dask_cudf/tests/test_accessor.py b/python/dask_cudf/dask_cudf/tests/test_accessor.py
index 48e0d022a52..94e0169bdf9 100644
--- a/python/dask_cudf/dask_cudf/tests/test_accessor.py
+++ b/python/dask_cudf/dask_cudf/tests/test_accessor.py
@@ -5,11 +5,11 @@
 
 from dask import dataframe as dd
 
-from cudf import DataFrame, Series
-from cudf.tests.utils import assert_eq, does_not_raise
-
 import dask_cudf as dgd
 
+from cudf import DataFrame, Series
+from cudf.testing._utils import assert_eq, does_not_raise
+
 #############################################################################
 #                        Datetime Accessor                                  #
 #############################################################################
diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py
index 2f73534b45a..cf5203a22e5 100644
--- a/python/dask_cudf/dask_cudf/tests/test_core.py
+++ b/python/dask_cudf/dask_cudf/tests/test_core.py
@@ -706,7 +706,7 @@ def test_dataframe_set_index():
 
     pddf = dd.from_pandas(pdf, npartitions=4)
     pddf = pddf.set_index("str")
-    from cudf.tests.utils import assert_eq
+    from cudf.testing._utils import assert_eq
 
     assert_eq(ddf.compute(), pddf.compute())
 
diff --git a/python/dask_cudf/dask_cudf/tests/test_distributed.py b/python/dask_cudf/dask_cudf/tests/test_distributed.py
index 85354704902..876a66f78d7 100644
--- a/python/dask_cudf/dask_cudf/tests/test_distributed.py
+++ b/python/dask_cudf/dask_cudf/tests/test_distributed.py
@@ -7,7 +7,7 @@
 from distributed.utils_test import loop  # noqa: F401
 
 import cudf
-from cudf.tests.utils import assert_eq
+from cudf.testing._utils import assert_eq
 
 import dask_cudf