From e038498c70207df1ac64b1aa276a5fd5e3cd306b Mon Sep 17 00:00:00 2001 From: James Duong Date: Tue, 26 Sep 2023 13:52:07 -0700 Subject: [PATCH] GH-25659: [Java] Add DefaultVectorComparators for Large types (#37887) ### Rationale for this change Support additional vector types in DefaultVectorComparators to make arrow-algorithm easier to use. ### What changes are included in this PR? Add DefaultVectorComparators for large vector types (LargeVarCharVector and LargeVarBinaryVector). ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * Closes: #25659 Authored-by: James Duong Signed-off-by: David Li --- .../sort/DefaultVectorComparators.java | 16 ++++++------ .../sort/TestDefaultVectorComparator.java | 26 +++++++++++++++++++ 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java index 99d66f94261ee..4f9c8b7d71bab 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java @@ -25,7 +25,6 @@ import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.memory.util.ByteFunctionHelpers; import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BaseVariableWidthVector; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.DateDayVector; @@ -50,6 +49,7 @@ import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.UInt8Vector; import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.VariableWidthVector; import org.apache.arrow.vector.complex.BaseRepeatedValueVector; /** @@ -112,7 +112,7 @@ public static VectorValueComparator createDefaultComp } else if (vector instanceof TimeStampVector) { return (VectorValueComparator) new TimeStampComparator(); } - } else if (vector instanceof BaseVariableWidthVector) { + } else if (vector instanceof VariableWidthVector) { return (VectorValueComparator) new VariableWidthComparator(); } else if (vector instanceof BaseRepeatedValueVector) { VectorValueComparator innerComparator = @@ -675,14 +675,14 @@ public VectorValueComparator createNew() { } /** - * Default comparator for {@link org.apache.arrow.vector.BaseVariableWidthVector}. + * Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}. * The comparison is in lexicographic order, with null comes first. */ - public static class VariableWidthComparator extends VectorValueComparator { + public static class VariableWidthComparator extends VectorValueComparator { - private ArrowBufPointer reusablePointer1 = new ArrowBufPointer(); + private final ArrowBufPointer reusablePointer1 = new ArrowBufPointer(); - private ArrowBufPointer reusablePointer2 = new ArrowBufPointer(); + private final ArrowBufPointer reusablePointer2 = new ArrowBufPointer(); @Override public int compare(int index1, int index2) { @@ -699,7 +699,7 @@ public int compareNotNull(int index1, int index2) { } @Override - public VectorValueComparator createNew() { + public VectorValueComparator createNew() { return new VariableWidthComparator(); } } @@ -743,7 +743,7 @@ public int compareNotNull(int index1, int index2) { @Override public VectorValueComparator createNew() { VectorValueComparator newInnerComparator = innerComparator.createNew(); - return new RepeatedValueComparator(newInnerComparator); + return new RepeatedValueComparator<>(newInnerComparator); } @Override diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java index 62051197740d8..bdae85110aa62 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java @@ -35,6 +35,8 @@ import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.IntervalDayVector; +import org.apache.arrow.vector.LargeVarBinaryVector; +import org.apache.arrow.vector.LargeVarCharVector; import org.apache.arrow.vector.SmallIntVector; import org.apache.arrow.vector.TimeMicroVector; import org.apache.arrow.vector.TimeMilliVector; @@ -47,6 +49,9 @@ import org.apache.arrow.vector.UInt2Vector; import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.UInt8Vector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; import org.apache.arrow.vector.types.TimeUnit; @@ -911,4 +916,25 @@ public void testCheckNullsOnCompareIsTrueWithEmptyVectors() { assertTrue(comparator.checkNullsOnCompare()); } } + + @Test + public void testVariableWidthDefaultComparators() { + try (VarCharVector vec = new VarCharVector("test", allocator)) { + verifyVariableWidthComparatorReturned(vec); + } + try (VarBinaryVector vec = new VarBinaryVector("test", allocator)) { + verifyVariableWidthComparatorReturned(vec); + } + try (LargeVarCharVector vec = new LargeVarCharVector("test", allocator)) { + verifyVariableWidthComparatorReturned(vec); + } + try (LargeVarBinaryVector vec = new LargeVarBinaryVector("test", allocator)) { + verifyVariableWidthComparatorReturned(vec); + } + } + + private static void verifyVariableWidthComparatorReturned(V vec) { + VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + assertEquals(DefaultVectorComparators.VariableWidthComparator.class, comparator.getClass()); + } }