Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
store the geopoint in three dimensions
Browse files Browse the repository at this point in the history
  • Loading branch information
irevoire committed Dec 14, 2021
1 parent 11a056d commit 98a365a
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 13 deletions.
17 changes: 16 additions & 1 deletion milli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,11 @@ pub type FieldId = u16;
pub type Position = u32;
pub type RelativePosition = u16;
pub type FieldDistribution = BTreeMap<String, u64>;
pub type GeoPoint = rstar::primitives::GeomWithData<[f64; 2], DocumentId>;

/// A GeoPoint is a point in cartesian plan, called xyz_point in the code. Its metadata
/// is a tuple composed of 1. the DocumentId of the associated document and 2. the original point
/// expressed in term of latitude and longitude.
pub type GeoPoint = rstar::primitives::GeomWithData<[f64; 3], (DocumentId, [f64; 2])>;

pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1;

Expand Down Expand Up @@ -168,6 +172,17 @@ pub fn distance_between_two_points(a: &[f64; 2], b: &[f64; 2]) -> f64 {
a.haversine_distance_to(&b).meters()
}

/// Convert a point expressed in terms of latitude and longitude to a point in the
/// cartesian coordinate expressed in terms of x, y and z.
pub fn lat_lng_to_xyz(coord: &[f64; 2]) -> [f64; 3] {
let [lat, lng] = coord.map(|f| f.to_radians());
let x = lat.cos() * lng.cos();
let y = lat.cos() * lng.sin();
let z = lat.sin();

[x, y, z]
}

#[cfg(test)]
mod tests {
use serde_json::json;
Expand Down
8 changes: 5 additions & 3 deletions milli/src/search/criteria/geo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use rstar::RTree;

use super::{Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
use crate::{GeoPoint, Index, Result};
use crate::{lat_lng_to_xyz, GeoPoint, Index, Result};

pub struct Geo<'t> {
index: &'t Index,
Expand Down Expand Up @@ -132,10 +132,12 @@ fn geo_point(
point: [f64; 2],
ascending: bool,
) -> Box<dyn Iterator<Item = RoaringBitmap>> {
let point = lat_lng_to_xyz(&point);

let mut results = Vec::new();
for point in rtree.nearest_neighbor_iter(&point) {
if candidates.remove(point.data) {
results.push(std::iter::once(point.data).collect());
if candidates.remove(point.data.0) {
results.push(std::iter::once(point.data.0).collect());
if candidates.is_empty() {
break;
}
Expand Down
12 changes: 8 additions & 4 deletions milli/src/search/facet/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ use crate::error::{Error, UserError};
use crate::heed_codec::facet::{
FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
};
use crate::{distance_between_two_points, CboRoaringBitmapCodec, FieldId, Index, Result};
use crate::{
distance_between_two_points, lat_lng_to_xyz, CboRoaringBitmapCodec, FieldId, Index, Result,
};

/// The maximum number of filters the filter AST can process.
const MAX_FILTER_DEPTH: usize = 2000;
Expand Down Expand Up @@ -402,12 +404,14 @@ impl<'a> Filter<'a> {
None => return Ok(RoaringBitmap::new()),
};

let xyz_base_point = lat_lng_to_xyz(&base_point);

let result = rtree
.nearest_neighbor_iter(&base_point)
.nearest_neighbor_iter(&xyz_base_point)
.take_while(|point| {
distance_between_two_points(&base_point, point.geom()) < radius
distance_between_two_points(&base_point, &point.data.1) < radius
})
.map(|point| point.data)
.map(|point| point.data.0)
.collect();

Ok(result)
Expand Down
6 changes: 3 additions & 3 deletions milli/src/update/delete_documents.rs
Original file line number Diff line number Diff line change
Expand Up @@ -395,9 +395,9 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {

let (points_to_remove, docids_to_remove): (Vec<_>, RoaringBitmap) = rtree
.iter()
.filter(|&point| self.documents_ids.contains(point.data))
.filter(|&point| self.documents_ids.contains(point.data.0))
.cloned()
.map(|point| (point, point.data))
.map(|point| (point, point.data.0))
.unzip();
points_to_remove.iter().for_each(|point| {
rtree.remove(&point);
Expand Down Expand Up @@ -747,7 +747,7 @@ mod tests {

let all_geo_ids = rtree.iter().map(|point| point.data).collect::<Vec<_>>();
let all_geo_documents = index
.documents(&rtxn, all_geo_ids.iter().copied())
.documents(&rtxn, all_geo_ids.iter().map(|(id, _)| id).copied())
.unwrap()
.iter()
.map(|(id, _)| *id)
Expand Down
9 changes: 7 additions & 2 deletions milli/src/update/index_documents/typed_chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ use super::helpers::{
};
use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
use crate::update::index_documents::helpers::into_clonable_grenad;
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result};
use crate::{
lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index,
Result,
};

pub(crate) enum TypedChunk {
DocidWordPositions(grenad::Reader<CursorClonableMmap>),
Expand Down Expand Up @@ -192,7 +195,9 @@ pub(crate) fn write_typed_chunk_into_index(
let (lat, tail) = helpers::try_split_array_at::<u8, 8>(value).unwrap();
let (lng, _) = helpers::try_split_array_at::<u8, 8>(tail).unwrap();
let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)];
rtree.insert(GeoPoint::new(point, docid));
let xyz_point = lat_lng_to_xyz(&point);

rtree.insert(GeoPoint::new(xyz_point, (docid, point)));
geo_faceted_docids.insert(docid);
}
index.put_geo_rtree(wtxn, &rtree)?;
Expand Down

0 comments on commit 98a365a

Please sign in to comment.