Skip to content

Commit

Permalink
fixed a performance issue with querying geohash by geometry (#1884)
Browse files Browse the repository at this point in the history
  • Loading branch information
rfecher committed Apr 25, 2022
1 parent 83b078a commit 0aa2626
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 26 deletions.
Expand Up @@ -9,12 +9,16 @@
package org.locationtech.geowave.core.geotime.binning;

import java.util.HashSet;
import java.util.Map.Entry;
import java.util.Set;
import java.util.stream.Collectors;
import org.locationtech.geowave.core.geotime.util.GeometryUtils;
import org.locationtech.geowave.core.geotime.util.GeometryUtils.GeometryHandler;
import org.locationtech.geowave.core.index.ByteArray;
import org.locationtech.geowave.core.index.ByteArrayRange;
import org.locationtech.geowave.core.index.StringUtils;
import org.locationtech.geowave.core.store.api.BinConstraints.ByteArrayConstraints;
import org.locationtech.geowave.core.store.statistics.query.BinConstraintsImpl.ExplicitConstraints;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.geom.LineString;
Expand All @@ -23,12 +27,51 @@
import com.github.davidmoten.geo.Coverage;
import com.github.davidmoten.geo.GeoHash;
import com.github.davidmoten.geo.LatLong;
import com.google.common.collect.HashMultimap;

class GeohashBinningHelper implements SpatialBinningHelper {
public GeohashBinningHelper() {
super();
}

@Override
public ByteArrayConstraints getGeometryConstraints(final Geometry geometry, final int precision) {
final GeohashGeometryHandler geometryHandler = new GeohashGeometryHandler(precision);
GeometryUtils.visitGeometry(geometry, geometryHandler);
// we try to replace all common prefixes with a prefix scan instead of using every individual
// hash on the query
// this can really help with query performance
if (removePrefixes(geometryHandler.hashes)) {
return new ExplicitConstraints(
geometryHandler.hashes.stream().map(str -> StringUtils.stringToBinary(str)).map(
bytes -> new ByteArrayRange(bytes, bytes)).toArray(ByteArrayRange[]::new));
}
return new ExplicitConstraints(
geometryHandler.hashes.stream().map(ByteArray::new).toArray(ByteArray[]::new));
}

private static boolean removePrefixes(final Set<String> allHashes) {
if (allHashes.isEmpty() || allHashes.iterator().next().isEmpty()) {
return false;
}
final HashMultimap<String, String> prefixMap = HashMultimap.create();
allHashes.forEach(s -> prefixMap.put(s.substring(0, s.length() - 1), s));
// if there are 32 entries of the same substring that means its prefix is fully covered and we
// can remove the 32 and replace with the prefix

// need to make sure the set is mutable because we will also try to find prefixes in this set
final Set<String> retVal =
prefixMap.asMap().entrySet().stream().filter(e -> e.getValue().size() == 32).map(
Entry::getKey).collect(Collectors.toCollection(HashSet::new));
if (retVal.isEmpty()) {
return false;
}
retVal.forEach(k -> prefixMap.get(k).forEach(v -> allHashes.remove(v)));
removePrefixes(retVal);
allHashes.addAll(retVal);
return true;
}

@Override
public ByteArray[] getSpatialBins(final Geometry geometry, final int precision) {
final GeohashGeometryHandler geometryHandler = new GeohashGeometryHandler(precision);
Expand Down
Expand Up @@ -357,22 +357,7 @@ public static byte[] getNextPrefix(final byte[] rowKeyPrefix) {
}

if (offset == 0) {
// TODO: is this correct? an empty byte array sorts before a single
// byte {0xFF}
// return new byte[0];

// it doesn't seem right, so instead, let's append several 0xFF
// bytes
return ByteArrayUtils.combineArrays(
rowKeyPrefix,
new byte[] {
(byte) 0xFF,
(byte) 0xFF,
(byte) 0xFF,
(byte) 0xFF,
(byte) 0xFF,
(byte) 0xFF,
(byte) 0xFF});
return getNextInclusive(rowKeyPrefix);
}

final byte[] newStopRow = Arrays.copyOfRange(rowKeyPrefix, 0, offset);
Expand All @@ -381,6 +366,19 @@ public static byte[] getNextPrefix(final byte[] rowKeyPrefix) {
return newStopRow;
}

public static byte[] getNextInclusive(final byte[] rowKeyPrefix) {
return ByteArrayUtils.combineArrays(
rowKeyPrefix,
new byte[] {
(byte) 0xFF,
(byte) 0xFF,
(byte) 0xFF,
(byte) 0xFF,
(byte) 0xFF,
(byte) 0xFF,
(byte) 0xFF});
}

public static byte[] getPreviousPrefix(final byte[] rowKeyPrefix) {
int offset = rowKeyPrefix.length;
while (offset > 0) {
Expand Down Expand Up @@ -477,8 +475,8 @@ public static boolean endsWith(final byte[] bytes, final byte[] suffix) {

public static boolean matchesPrefixRanges(final byte[] bytes, final List<ByteArrayRange> ranges) {
return ranges.stream().anyMatch(range -> {
return ByteArrayUtils.compareToPrefix(bytes, range.getStart()) >= 0
&& ByteArrayUtils.compareToPrefix(bytes, range.getEnd()) <= 0;
return (ByteArrayUtils.compareToPrefix(bytes, range.getStart()) >= 0)
&& (ByteArrayUtils.compareToPrefix(bytes, range.getEnd()) <= 0);
});
}

Expand Down
Expand Up @@ -12,6 +12,7 @@
import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;
import org.locationtech.geowave.core.index.ByteArrayUtils;
import org.locationtech.geowave.core.store.CloseableIterator;
import org.locationtech.geowave.core.store.CloseableIteratorWrapper;
import org.locationtech.geowave.core.store.entities.GeoWaveMetadata;
Expand Down Expand Up @@ -48,7 +49,7 @@ public CloseableIterator<GeoWaveMetadata> query(final MetadataQuery query) {

final boolean needsVisibility =
metadataType.isStatValues()
&& this.operations.getOptions().getBaseOptions().isVisibilityEnabled();
&& operations.getOptions().getBaseOptions().isVisibilityEnabled();
final Iterator<Map<String, AttributeValue>> iterator;
if (!query.hasPrimaryIdRanges()) {
if (query.hasPrimaryId() && query.isExact()) {
Expand Down Expand Up @@ -109,8 +110,10 @@ public CloseableIterator<GeoWaveMetadata> query(final MetadataQuery query) {
DynamoDBOperations.METADATA_PRIMARY_ID_KEY,
new Condition().withAttributeValueList(
new AttributeValue().withB(ByteBuffer.wrap(r.getStart())),
new AttributeValue().withB(ByteBuffer.wrap(r.getEnd()))).withComparisonOperator(
ComparisonOperator.BETWEEN));
new AttributeValue().withB(
ByteBuffer.wrap(
ByteArrayUtils.getNextInclusive(r.getEnd())))).withComparisonOperator(
ComparisonOperator.BETWEEN));

} else {
scan.addScanFilterEntry(
Expand Down
Expand Up @@ -304,7 +304,7 @@ private QueryRequest getQuery(
new AttributeValue().withB(ByteBuffer.wrap(partitionId))));
if (sortRange == null) {
start = ByteArrayUtils.shortToByteArray(internalAdapterId);
end = new ByteArray(start).getNextPrefix();
end = ByteArrayUtils.getNextInclusive(start);
} else if (sortRange.isSingleValue()) {
start =
ByteArrayUtils.combineArrays(
Expand All @@ -314,7 +314,7 @@ private QueryRequest getQuery(
ByteArrayUtils.combineArrays(
ByteArrayUtils.shortToByteArray(internalAdapterId),
DynamoDBUtils.encodeSortableBase64(
ByteArrayUtils.getNextPrefix(sortRange.getStart())));
ByteArrayUtils.getNextInclusive(sortRange.getStart())));
} else {
if (sortRange.getStart() == null) {
start = ByteArrayUtils.shortToByteArray(internalAdapterId);
Expand All @@ -333,9 +333,8 @@ private QueryRequest getQuery(
DynamoDBUtils.encodeSortableBase64(next(sortRange.getEnd())));
}
}
// because this is using getEndAsNextPrefix and between is inclusive on the end, it seems this
// could include an additional result on a very unlikely corner case that the end prefix exactly
// matches a key (perhaps its better to use GE and LT to force strictly less than?)
// because this DYNAMODB BETWEEN is inclusive on the end, we are using an inclusive getEnd which
// appends 0xFF instead of the typical getEndAsNextPrefix which assumes an exclusive end
query.addKeyConditionsEntry(
DynamoDBRow.GW_RANGE_KEY,
new Condition().withComparisonOperator(ComparisonOperator.BETWEEN).withAttributeValueList(
Expand Down

0 comments on commit 0aa2626

Please sign in to comment.