Skip to content

Commit

Permalink
Assign partition index to geometries outside of KdbTree
Browse files Browse the repository at this point in the history
Currently, if a geometry is outside the bounding box of a KdbTree, it is
dropped: it's assigned an empty partition index array, which is
unnested, resulting in the row being dropped.  This can be an efficiency
measure: if one side of the join is much smaller than the other, then
the bounds will drop many rows before they are sent to the join worker.

However, if the bounds are less than both the build- and probe-side of
the join, then rows that would have matched in a non-partitioned join
will be dropped when you partition the join.  This makes the correctness
of the partitioned join dependent on the partition chosen, which can
lead to some surprising output changes that could be reasonably viewed
as data loss.

This commit changes the bounding box of the KdbTree to extend from
-Infinity to +Infinity, so that all (non-empty) geometries will get
at least one partition.
  • Loading branch information
jagill authored and arhimondr committed Oct 7, 2020
1 parent 89c3996 commit 4cd357c
Show file tree
Hide file tree
Showing 9 changed files with 133 additions and 127 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@
/**
* 2-dimensional K-D-B Tree
* see https://en.wikipedia.org/wiki/K-D-B-tree
*
* This KdbTree is open: it covers the entire plane, and the border nodes extend
* out to infinity. For each node, the right (xMax) and top (yMax) are not
* considered part of the node. This means, for intersection/containment checks,
* a strict inequality `<` must be used for xMax and yMax.
*/
public class KdbTree
{
Expand Down Expand Up @@ -277,12 +282,11 @@ public int next()
}
}

public static KdbTree buildKdbTree(int maxItemsPerNode, Rectangle extent, List<Rectangle> items)
public static KdbTree buildKdbTree(int maxItemsPerNode, List<Rectangle> items)
{
checkArgument(maxItemsPerNode > 0, "maxItemsPerNode must be > 0");
requireNonNull(extent, "extent is null");
requireNonNull(items, "items is null");
return new KdbTree(buildKdbTreeNode(maxItemsPerNode, 0, extent, items, new LeafIdAllocator()));
return new KdbTree(buildKdbTreeNode(maxItemsPerNode, 0, Rectangle.getUniverseRectangle(), items, new LeafIdAllocator()));
}

private static Node buildKdbTreeNode(int maxItemsPerNode, int level, Rectangle extent, List<Rectangle> items, LeafIdAllocator leafIdAllocator)
Expand All @@ -298,7 +302,7 @@ private static Node buildKdbTreeNode(int maxItemsPerNode, int level, Rectangle e
}

// Split over longer side
boolean splitVertically = extent.getWidth() >= extent.getHeight();
boolean splitVertically = shouldSplitVertically(extent);
Optional<SplitResult<Node>> splitResult = trySplit(splitVertically ? BY_X : BY_Y, maxItemsPerNode, level, extent, items, leafIdAllocator);
if (!splitResult.isPresent()) {
// Try spitting by the other side
Expand All @@ -312,6 +316,37 @@ private static Node buildKdbTreeNode(int maxItemsPerNode, int level, Rectangle e
return newInternal(extent, splitResult.get().getLeft(), splitResult.get().getRight());
}

/*
* Split vertically if the extent is wider than it is tall. Consider a
* doubly-infinite "longer" than half-infinite, and half-infinite longer
* than finite.
*/
private static boolean shouldSplitVertically(Rectangle extent)
{
int numHorizontalInfinities = 0;
if (extent.getXMax() == Double.POSITIVE_INFINITY) {
numHorizontalInfinities += 1;
}
if (extent.getXMin() == Double.NEGATIVE_INFINITY) {
numHorizontalInfinities += 1;
}

int numVerticalInfinities = 0;
if (extent.getYMax() == Double.POSITIVE_INFINITY) {
numVerticalInfinities += 1;
}
if (extent.getYMin() == Double.NEGATIVE_INFINITY) {
numVerticalInfinities += 1;
}

if (numHorizontalInfinities == numVerticalInfinities) {
return extent.getWidth() >= extent.getHeight();
}
else {
return numHorizontalInfinities > numVerticalInfinities;
}
}

private static final class SplitResult<T>
{
private final T left;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ public final class Rectangle
implements HasExtent
{
private static final int INSTANCE_SIZE = ClassLayout.parseClass(Rectangle.class).instanceSize();
private static final Rectangle UNIVERSE_RECTANGLE = new Rectangle(
Double.NEGATIVE_INFINITY,
Double.NEGATIVE_INFINITY,
Double.POSITIVE_INFINITY,
Double.POSITIVE_INFINITY);

private final double xMin;
private final double yMin;
Expand All @@ -51,6 +56,11 @@ public Rectangle(
this.yMax = yMax;
}

public static Rectangle getUniverseRectangle()
{
return UNIVERSE_RECTANGLE;
}

@JsonProperty
public double getXMin()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,25 @@
import java.util.Set;

import static com.facebook.presto.geospatial.KdbTree.buildKdbTree;
import static java.lang.Double.NEGATIVE_INFINITY;
import static java.lang.Double.POSITIVE_INFINITY;
import static org.testng.Assert.assertEquals;

public class TestKdbTree
{
@Test
public void testSerde()
{
Rectangle extent = new Rectangle(0, 0, 9, 4);
ImmutableList.Builder<Rectangle> rectangles = ImmutableList.builder();
for (double x = 0; x < 10; x += 1) {
for (double y = 0; y < 5; y += 1) {
rectangles.add(new Rectangle(x, y, x + 0.1, y + 0.2));
}
}

testSerializationRoundtrip(buildKdbTree(100, extent, rectangles.build()));
testSerializationRoundtrip(buildKdbTree(20, extent, rectangles.build()));
testSerializationRoundtrip(buildKdbTree(10, extent, rectangles.build()));
testSerializationRoundtrip(buildKdbTree(100, rectangles.build()));
testSerializationRoundtrip(buildKdbTree(20, rectangles.build()));
testSerializationRoundtrip(buildKdbTree(10, rectangles.build()));
}

private void testSerializationRoundtrip(KdbTree tree)
Expand All @@ -57,21 +58,20 @@ public void testSinglePartition()

private void testSinglePartition(double width, double height)
{
Rectangle extent = new Rectangle(0, 0, 9, 4);
ImmutableList.Builder<Rectangle> rectangles = ImmutableList.builder();
for (double x = 0; x < 10; x += 1) {
for (double y = 0; y < 5; y += 1) {
rectangles.add(new Rectangle(x, y, x + width, y + height));
}
}

KdbTree tree = buildKdbTree(100, extent, rectangles.build());
KdbTree tree = buildKdbTree(100, rectangles.build());

assertEquals(tree.getLeaves().size(), 1);

Map.Entry<Integer, Rectangle> entry = Iterables.getOnlyElement(tree.getLeaves().entrySet());
assertEquals(entry.getKey().intValue(), 0);
assertEquals(entry.getValue(), extent);
assertEquals(entry.getValue(), Rectangle.getUniverseRectangle());
}

@Test
Expand All @@ -83,24 +83,29 @@ public void testSplitVertically()

private void testSplitVertically(double width, double height)
{
Rectangle extent = new Rectangle(0, 0, 9, 4);
ImmutableList.Builder<Rectangle> rectangles = ImmutableList.builder();
for (int x = 0; x < 10; x++) {
for (int y = 0; y < 5; y++) {
rectangles.add(new Rectangle(x, y, x + width, y + height));
}
}

KdbTree treeCopy = buildKdbTree(25, extent, rectangles.build());
KdbTree tree = buildKdbTree(25, rectangles.build());

Map<Integer, Rectangle> leafNodes = treeCopy.getLeaves();
Map<Integer, Rectangle> leafNodes = tree.getLeaves();
assertEquals(leafNodes.size(), 2);
assertEquals(leafNodes.keySet(), ImmutableSet.of(0, 1));
assertEquals(leafNodes.get(0), new Rectangle(0, 0, 4.5, 4));
assertEquals(leafNodes.get(1), new Rectangle(4.5, 0, 9, 4));
assertEquals(leafNodes.get(0), new Rectangle(NEGATIVE_INFINITY, NEGATIVE_INFINITY, 4.5, POSITIVE_INFINITY));
assertEquals(leafNodes.get(1), new Rectangle(4.5, NEGATIVE_INFINITY, POSITIVE_INFINITY, POSITIVE_INFINITY));

assertPartitions(treeCopy, new Rectangle(1, 1, 2, 2), ImmutableSet.of(0));
assertPartitions(treeCopy, new Rectangle(1, 1, 5, 2), ImmutableSet.of(0, 1));
assertPartitions(tree, new Rectangle(1, 1, 2, 2), ImmutableSet.of(0));
assertPartitions(tree, new Rectangle(1, 1, 5, 2), ImmutableSet.of(0, 1));
assertPartitions(tree, new Rectangle(5, 1, 5, 2), ImmutableSet.of(1));
// Partitions do not contain right or top border, but contain left and bottom borders
assertPartitions(tree, new Rectangle(4.5, 0, 4.5, 0), ImmutableSet.of(1));
// "Outside" partitions
assertPartitions(tree, new Rectangle(-6, 2, -6, 2), ImmutableSet.of(0));
assertPartitions(tree, new Rectangle(20, 2, 20, 2), ImmutableSet.of(1));
}

@Test
Expand All @@ -112,35 +117,38 @@ public void testSplitHorizontally()

private void testSplitHorizontally(double width, double height)
{
Rectangle extent = new Rectangle(0, 0, 4, 9);
ImmutableList.Builder<Rectangle> rectangles = ImmutableList.builder();
for (int x = 0; x < 5; x++) {
for (int y = 0; y < 10; y++) {
rectangles.add(new Rectangle(x, y, x + width, y + height));
}
}

KdbTree tree = buildKdbTree(25, extent, rectangles.build());
KdbTree tree = buildKdbTree(25, rectangles.build());

Map<Integer, Rectangle> leafNodes = tree.getLeaves();
assertEquals(leafNodes.size(), 2);
assertEquals(leafNodes.keySet(), ImmutableSet.of(0, 1));
assertEquals(leafNodes.get(0), new Rectangle(0, 0, 4, 4.5));
assertEquals(leafNodes.get(1), new Rectangle(0, 4.5, 4, 9));
assertEquals(leafNodes.size(), 3);
assertEquals(leafNodes.keySet(), ImmutableSet.of(0, 1, 2));
assertEquals(leafNodes.get(0), new Rectangle(NEGATIVE_INFINITY, NEGATIVE_INFINITY, 2.5, 4.5));
assertEquals(leafNodes.get(1), new Rectangle(NEGATIVE_INFINITY, 4.5, 2.5, POSITIVE_INFINITY));
assertEquals(leafNodes.get(2), new Rectangle(2.5, NEGATIVE_INFINITY, POSITIVE_INFINITY, POSITIVE_INFINITY));

// points inside and outside partitions
assertPartitions(tree, new Rectangle(1, 1, 1, 1), ImmutableSet.of(0));
assertPartitions(tree, new Rectangle(1, 6, 1, 6), ImmutableSet.of(1));
assertPartitions(tree, new Rectangle(5, 1, 5, 1), ImmutableSet.of());
assertPartitions(tree, new Rectangle(5, 1, 5, 1), ImmutableSet.of(2));

// point on the border separating two partitions
assertPartitions(tree, new Rectangle(1, 4.5, 1, 4.5), ImmutableSet.of(1));
assertPartitions(tree, new Rectangle(2.5, 4.5, 2.5, 4.5), ImmutableSet.of(2));
assertPartitions(tree, new Rectangle(2.5, 1, 2.5, 1), ImmutableSet.of(2));
assertPartitions(tree, new Rectangle(2.5, 5, 2.5, 5), ImmutableSet.of(2));

// rectangles
assertPartitions(tree, new Rectangle(1, 1, 2, 2), ImmutableSet.of(0));
assertPartitions(tree, new Rectangle(1, 6, 2, 7), ImmutableSet.of(1));
assertPartitions(tree, new Rectangle(1, 1, 2, 5), ImmutableSet.of(0, 1));
assertPartitions(tree, new Rectangle(5, 1, 6, 2), ImmutableSet.of());
assertPartitions(tree, new Rectangle(5, 1, 6, 2), ImmutableSet.of(2));
}

private void assertPartitions(KdbTree kdbTree, Rectangle envelope, Set<Integer> partitions)
Expand All @@ -159,25 +167,24 @@ public void testEvenDistribution()

private void testEvenDistribution(double width, double height)
{
Rectangle extent = new Rectangle(0, 0, 9, 4);
ImmutableList.Builder<Rectangle> rectangles = ImmutableList.builder();
for (int x = 0; x < 10; x++) {
for (int y = 0; y < 5; y++) {
rectangles.add(new Rectangle(x, y, x + width, y + height));
}
}

KdbTree tree = buildKdbTree(10, extent, rectangles.build());
KdbTree tree = buildKdbTree(10, rectangles.build());

Map<Integer, Rectangle> leafNodes = tree.getLeaves();
assertEquals(leafNodes.size(), 6);
assertEquals(leafNodes.keySet(), ImmutableSet.of(0, 1, 2, 3, 4, 5));
assertEquals(leafNodes.get(0), new Rectangle(0, 0, 2.5, 2.5));
assertEquals(leafNodes.get(1), new Rectangle(0, 2.5, 2.5, 4));
assertEquals(leafNodes.get(2), new Rectangle(2.5, 0, 4.5, 4));
assertEquals(leafNodes.get(3), new Rectangle(4.5, 0, 7.5, 2.5));
assertEquals(leafNodes.get(4), new Rectangle(4.5, 2.5, 7.5, 4));
assertEquals(leafNodes.get(5), new Rectangle(7.5, 0, 9, 4));
assertEquals(leafNodes.get(0), new Rectangle(NEGATIVE_INFINITY, NEGATIVE_INFINITY, 2.5, 2.5));
assertEquals(leafNodes.get(1), new Rectangle(2.5, NEGATIVE_INFINITY, 4.5, 2.5));
assertEquals(leafNodes.get(2), new Rectangle(NEGATIVE_INFINITY, 2.5, 4.5, POSITIVE_INFINITY));
assertEquals(leafNodes.get(3), new Rectangle(4.5, NEGATIVE_INFINITY, 7.5, 2.5));
assertEquals(leafNodes.get(4), new Rectangle(7.5, NEGATIVE_INFINITY, POSITIVE_INFINITY, 2.5));
assertEquals(leafNodes.get(5), new Rectangle(4.5, 2.5, POSITIVE_INFINITY, POSITIVE_INFINITY));
}

@Test
Expand All @@ -189,7 +196,6 @@ public void testSkewedDistribution()

private void testSkewedDistribution(double width, double height)
{
Rectangle extent = new Rectangle(0, 0, 9, 4);
ImmutableList.Builder<Rectangle> rectangles = ImmutableList.builder();
for (int x = 0; x < 10; x++) {
for (int y = 0; y < 5; y++) {
Expand All @@ -203,20 +209,20 @@ private void testSkewedDistribution(double width, double height)
}
}

KdbTree tree = buildKdbTree(10, extent, rectangles.build());
KdbTree tree = buildKdbTree(10, rectangles.build());

Map<Integer, Rectangle> leafNodes = tree.getLeaves();
assertEquals(leafNodes.size(), 9);
assertEquals(leafNodes.keySet(), ImmutableSet.of(0, 1, 2, 3, 4, 5, 6, 7, 8));
assertEquals(leafNodes.get(0), new Rectangle(0, 0, 1.5, 2.5));
assertEquals(leafNodes.get(1), new Rectangle(1.5, 0, 3.5, 2.5));
assertEquals(leafNodes.get(2), new Rectangle(0, 2.5, 3.5, 4));
assertEquals(leafNodes.get(3), new Rectangle(3.5, 0, 5.1, 1.75));
assertEquals(leafNodes.get(4), new Rectangle(3.5, 1.75, 5.1, 4));
assertEquals(leafNodes.get(5), new Rectangle(5.1, 0, 5.9, 1.75));
assertEquals(leafNodes.get(6), new Rectangle(5.9, 0, 9, 1.75));
assertEquals(leafNodes.get(7), new Rectangle(5.1, 1.75, 7.5, 4));
assertEquals(leafNodes.get(8), new Rectangle(7.5, 1.75, 9, 4));
assertEquals(leafNodes.get(0), new Rectangle(NEGATIVE_INFINITY, NEGATIVE_INFINITY, 1.5, 2.5));
assertEquals(leafNodes.get(1), new Rectangle(1.5, NEGATIVE_INFINITY, 3.5, 2.5));
assertEquals(leafNodes.get(2), new Rectangle(3.5, NEGATIVE_INFINITY, 5.1, 2.5));
assertEquals(leafNodes.get(3), new Rectangle(NEGATIVE_INFINITY, 2.5, 2.5, POSITIVE_INFINITY));
assertEquals(leafNodes.get(4), new Rectangle(2.5, 2.5, 5.1, POSITIVE_INFINITY));
assertEquals(leafNodes.get(5), new Rectangle(5.1, NEGATIVE_INFINITY, 5.9, 1.75));
assertEquals(leafNodes.get(6), new Rectangle(5.9, NEGATIVE_INFINITY, POSITIVE_INFINITY, 1.75));
assertEquals(leafNodes.get(7), new Rectangle(5.1, 1.75, 7.5, POSITIVE_INFINITY));
assertEquals(leafNodes.get(8), new Rectangle(7.5, 1.75, POSITIVE_INFINITY, POSITIVE_INFINITY));
}

@Test
Expand All @@ -228,7 +234,6 @@ public void testCantSplitVertically()

private void testCantSplitVertically(double width, double height)
{
Rectangle extent = new Rectangle(0, 0, 9 + width, 4 + height);
ImmutableList.Builder<Rectangle> rectangles = ImmutableList.builder();
for (int y = 0; y < 5; y++) {
for (int i = 0; i < 10; i++) {
Expand All @@ -237,21 +242,21 @@ private void testCantSplitVertically(double width, double height)
}
}

KdbTree tree = buildKdbTree(10, extent, rectangles.build());
KdbTree tree = buildKdbTree(10, rectangles.build());

Map<Integer, Rectangle> leafNodes = tree.getLeaves();
assertEquals(leafNodes.size(), 10);
assertEquals(leafNodes.keySet(), ImmutableSet.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
assertEquals(leafNodes.get(0), new Rectangle(0, 0, 4.5, 0.5));
assertEquals(leafNodes.get(1), new Rectangle(0, 0.5, 4.5, 1.5));
assertEquals(leafNodes.get(2), new Rectangle(0, 1.5, 4.5, 2.5));
assertEquals(leafNodes.get(3), new Rectangle(0, 2.5, 4.5, 3.5));
assertEquals(leafNodes.get(4), new Rectangle(0, 3.5, 4.5, 4 + height));
assertEquals(leafNodes.get(5), new Rectangle(4.5, 0, 9 + width, 0.5));
assertEquals(leafNodes.get(6), new Rectangle(4.5, 0.5, 9 + width, 1.5));
assertEquals(leafNodes.get(7), new Rectangle(4.5, 1.5, 9 + width, 2.5));
assertEquals(leafNodes.get(8), new Rectangle(4.5, 2.5, 9 + width, 3.5));
assertEquals(leafNodes.get(9), new Rectangle(4.5, 3.5, 9 + width, 4 + height));
assertEquals(leafNodes.get(0), new Rectangle(NEGATIVE_INFINITY, NEGATIVE_INFINITY, 4.5, 0.5));
assertEquals(leafNodes.get(1), new Rectangle(NEGATIVE_INFINITY, 0.5, 4.5, 1.5));
assertEquals(leafNodes.get(2), new Rectangle(NEGATIVE_INFINITY, 1.5, 4.5, 2.5));
assertEquals(leafNodes.get(3), new Rectangle(NEGATIVE_INFINITY, 2.5, 4.5, 3.5));
assertEquals(leafNodes.get(4), new Rectangle(NEGATIVE_INFINITY, 3.5, 4.5, POSITIVE_INFINITY));
assertEquals(leafNodes.get(5), new Rectangle(4.5, NEGATIVE_INFINITY, POSITIVE_INFINITY, 0.5));
assertEquals(leafNodes.get(6), new Rectangle(4.5, 0.5, POSITIVE_INFINITY, 1.5));
assertEquals(leafNodes.get(7), new Rectangle(4.5, 1.5, POSITIVE_INFINITY, 2.5));
assertEquals(leafNodes.get(8), new Rectangle(4.5, 2.5, POSITIVE_INFINITY, 3.5));
assertEquals(leafNodes.get(9), new Rectangle(4.5, 3.5, POSITIVE_INFINITY, POSITIVE_INFINITY));
}

@Test
Expand All @@ -263,7 +268,6 @@ public void testCantSplit()

private void testCantSplit(double width, double height)
{
Rectangle extent = new Rectangle(0, 0, 9 + width, 4 + height);
ImmutableList.Builder<Rectangle> rectangles = ImmutableList.builder();
for (int i = 0; i < 10; i++) {
for (int j = 0; j < 5; j++) {
Expand All @@ -272,12 +276,12 @@ private void testCantSplit(double width, double height)
}
}

KdbTree tree = buildKdbTree(10, extent, rectangles.build());
KdbTree tree = buildKdbTree(10, rectangles.build());

Map<Integer, Rectangle> leafNodes = tree.getLeaves();
assertEquals(leafNodes.size(), 2);
assertEquals(leafNodes.keySet(), ImmutableSet.of(0, 1));
assertEquals(leafNodes.get(0), new Rectangle(0, 0, 4.5, 4 + height));
assertEquals(leafNodes.get(1), new Rectangle(4.5, 0, 9 + width, 4 + height));
assertEquals(leafNodes.get(0), new Rectangle(NEGATIVE_INFINITY, NEGATIVE_INFINITY, 4.5, POSITIVE_INFINITY));
assertEquals(leafNodes.get(1), new Rectangle(4.5, NEGATIVE_INFINITY, POSITIVE_INFINITY, POSITIVE_INFINITY));
}
}
Loading

0 comments on commit 4cd357c

Please sign in to comment.