From 0aff768efc9d4a28114c16904af051c757c12442 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 23 Sep 2025 15:57:00 -0600 Subject: [PATCH 1/2] fix: wkb proj:geometry w/o setting geo metadata --- Cargo.toml | 1 + crates/core/Cargo.toml | 2 + crates/core/src/error.rs | 5 +++ crates/core/src/geoarrow/json.rs | 33 ++++++++++++++-- crates/core/src/geoarrow/mod.rs | 67 +++++++++++++++++++++----------- crates/core/src/geoparquet.rs | 26 +++++++++++++ 6 files changed, 108 insertions(+), 26 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f24273e6..fd7b0e9e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -111,3 +111,4 @@ tracing-subscriber = { version = "0.3.18", features = [ tracing-indicatif = "0.3.9" url = "2.3" webpki-roots = "1.0.0" +wkb = "0.9.0" diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 976bf3c4..5b677a85 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -22,6 +22,7 @@ geoarrow = [ "dep:arrow-schema", "dep:geo-traits", "dep:geo-types", + "dep:wkb", ] geoparquet = ["geoarrow", "dep:geoparquet", "dep:parquet"] @@ -49,6 +50,7 @@ stac-derive.workspace = true thiserror.workspace = true tracing.workspace = true url = { workspace = true, features = ["serde"] } +wkb = { workspace = true, optional = true } [dev-dependencies] assert-json-diff.workspace = true diff --git a/crates/core/src/error.rs b/crates/core/src/error.rs index b3ed2a3f..d95149db 100644 --- a/crates/core/src/error.rs +++ b/crates/core/src/error.rs @@ -103,4 +103,9 @@ pub enum Error { /// [url::ParseError] #[error(transparent)] UrlParse(#[from] url::ParseError), + + /// [wkb::error::WkbError] + #[error(transparent)] + #[cfg(feature = "geoarrow")] + Wkb(#[from] wkb::error::WkbError), } diff --git a/crates/core/src/geoarrow/json.rs b/crates/core/src/geoarrow/json.rs index 6774e59a..27834e82 100644 --- a/crates/core/src/geoarrow/json.rs +++ b/crates/core/src/geoarrow/json.rs @@ -419,10 +419,35 @@ fn set_column_for_json_rows( } } _ => { - return Err(ArrowError::JsonError(format!( - "data type {:?} not supported in nested map for json writer", - array.data_type() - ))); + if col_name == "proj:geometry" { + let binary_array = as_generic_binary_array::(array); + rows.iter_mut() + .zip(binary_array.iter()) + .filter_map(|(maybe_row, maybe_value)| { + maybe_row.as_mut().map(|row| (row, maybe_value)) + }) + .try_for_each(|(row, maybe_value)| -> Result<(), ArrowError> { + let maybe_value = maybe_value + .map(|value| -> Result<_, ArrowError> { + let wkb = wkb::reader::read_wkb(value) + .map_err(|err| ArrowError::ExternalError(Box::new(err)))?; + let value = geojson::Value::from(&wkb.to_geometry()); + Ok(value) + }) + .transpose()?; + if let Some(j) = maybe_value { + row.insert(col_name.to_string(), Value::try_from(&j).unwrap()); + } else if explicit_nulls { + row.insert(col_name.to_string(), Value::Null); + } + Ok(()) + })?; + } else { + return Err(ArrowError::JsonError(format!( + "data type {:?} not supported in nested map for json writer", + array.data_type() + ))); + } } } Ok(()) diff --git a/crates/core/src/geoarrow/mod.rs b/crates/core/src/geoarrow/mod.rs index 52d8aa33..c6b82a27 100644 --- a/crates/core/src/geoarrow/mod.rs +++ b/crates/core/src/geoarrow/mod.rs @@ -3,7 +3,10 @@ pub mod json; use crate::{Error, ItemCollection, Result}; -use arrow_array::{RecordBatch, RecordBatchIterator, RecordBatchReader, cast::AsArray}; +use arrow_array::{ + Array, RecordBatch, RecordBatchIterator, RecordBatchReader, builder::BinaryBuilder, + cast::AsArray, +}; use arrow_json::ReaderBuilder; use arrow_schema::{DataType, Field, SchemaBuilder, SchemaRef, TimeUnit}; use geo_types::Geometry; @@ -14,7 +17,7 @@ use geoarrow_array::{ }; use geoarrow_schema::{GeoArrowType, GeometryType, Metadata}; use serde_json::{Value, json}; -use std::{collections::HashMap, sync::Arc}; +use std::{io::Cursor, sync::Arc}; /// The stac-geoparquet version metadata key. pub const VERSION_KEY: &str = "stac:geoparquet_version"; @@ -22,9 +25,6 @@ pub const VERSION_KEY: &str = "stac:geoparquet_version"; /// The stac-geoparquet version. pub const VERSION: &str = "1.0.0"; -/// Geometry columns. -pub const GEOMETRY_COLUMNS: [&str; 2] = ["geometry", "proj:geometry"]; - /// Datetime columns. pub const DATETIME_COLUMNS: [&str; 8] = [ "datetime", @@ -79,7 +79,8 @@ impl TableBuilder { /// ``` pub fn build(self) -> Result { let mut values = Vec::with_capacity(self.item_collection.items.len()); - let mut geometry_builders = HashMap::new(); + let mut geometry_builder = GeometryBuilder::new(GeometryType::new(Default::default())); + let mut proj_geometry_builder = BinaryBuilder::new(); for item in self.item_collection.items { let mut value = @@ -88,18 +89,20 @@ impl TableBuilder { let value = value .as_object_mut() .expect("a flat item should serialize to an object"); - for key in GEOMETRY_COLUMNS { - if let Some(value) = value.remove(key) { - let entry = geometry_builders.entry(key).or_insert_with(|| { - let geometry_type = GeometryType::new(Default::default()); - GeometryBuilder::new(geometry_type) - }); - let geometry = - geojson::Geometry::from_json_value(value).map_err(Box::new)?; - entry.push_geometry(Some( - &(Geometry::try_from(geometry).map_err(Box::new)?), - ))?; - } + if let Some(value) = value.remove("geometry") { + let geometry = geojson::Geometry::from_json_value(value).map_err(Box::new)?; + geometry_builder + .push_geometry(Some(&(Geometry::try_from(geometry).map_err(Box::new)?)))?; + } + if let Some(value) = value.remove("proj:geometry") { + let geometry = geojson::Geometry::from_json_value(value).map_err(Box::new)?; + let mut cursor = Cursor::new(Vec::new()); + wkb::writer::write_geometry( + &mut cursor, + &Geometry::try_from(geometry).map_err(Box::new)?, + &Default::default(), + )?; + proj_geometry_builder.append_value(cursor.into_inner()); } if let Some(bbox) = value.remove("bbox") { let bbox = convert_bbox(bbox)?; @@ -132,10 +135,14 @@ impl TableBuilder { // Add the geometries back in. let mut schema_builder = SchemaBuilder::from(schema.fields()); let mut columns = record_batch.columns().to_vec(); - for (key, geometry_builder) in geometry_builders { - let geometry_array = geometry_builder.finish(); - columns.push(geometry_array.to_array_ref()); - schema_builder.push(geometry_array.data_type().to_field(key, true)); + let geometry_array = geometry_builder.finish(); + columns.push(geometry_array.to_array_ref()); + schema_builder.push(geometry_array.data_type().to_field("geometry", true)); + let proj_geometry_array = proj_geometry_builder.finish(); + if !proj_geometry_array.is_empty() { + let data_type = proj_geometry_array.data_type().clone(); + columns.push(Arc::new(proj_geometry_array)); + schema_builder.push(Field::new("proj:geometry", data_type, true)); } let _ = schema_builder .metadata_mut() @@ -350,4 +357,20 @@ mod tests { let record_batch = record_batches.pop().unwrap(); let _ = super::with_wkb_geometry(record_batch, "geometry").unwrap(); } + + #[test] + fn has_proj_geometry() { + let item: Item = + crate::read("examples/extensions-collection/proj-example/proj-example.json").unwrap(); + let table = Table::from_item_collection(vec![item]).unwrap(); + let (mut record_batches, _) = table.into_inner(); + assert_eq!(record_batches.len(), 1); + let record_batch = record_batches.pop().unwrap(); + assert!( + record_batch + .schema() + .column_with_name("proj:geometry") + .is_some() + ); + } } diff --git a/crates/core/src/geoparquet.rs b/crates/core/src/geoparquet.rs index ef7dd32d..1f744f16 100644 --- a/crates/core/src/geoparquet.rs +++ b/crates/core/src/geoparquet.rs @@ -372,4 +372,30 @@ mod tests { let cursor = Cursor::new(Vec::new()); super::into_writer(cursor, items).unwrap(); } + + #[test] + fn no_proj_geometry_metadata() { + let item: Item = + crate::read("examples/extensions-collection/proj-example/proj-example.json").unwrap(); + let mut cursor = Cursor::new(Vec::new()); + super::into_writer(&mut cursor, vec![item]).unwrap(); + let bytes = Bytes::from(cursor.into_inner()); + let reader = SerializedFileReader::new(bytes).unwrap(); + let key_value = reader + .metadata() + .file_metadata() + .key_value_metadata() + .unwrap() + .iter() + .find(|key_value| key_value.key == "geo") + .unwrap(); + let value: serde_json::Value = + serde_json::from_str(key_value.value.as_deref().unwrap()).unwrap(); + assert!( + !value["columns"] + .as_object() + .unwrap() + .contains_key("proj:geometry") + ); + } } From 6e99d13e51f6cd415f5008091a163957659acef6 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 23 Sep 2025 16:01:06 -0600 Subject: [PATCH 2/2] fix: clippy --- crates/core/src/geoarrow/json.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core/src/geoarrow/json.rs b/crates/core/src/geoarrow/json.rs index 27834e82..6a5739f3 100644 --- a/crates/core/src/geoarrow/json.rs +++ b/crates/core/src/geoarrow/json.rs @@ -436,7 +436,7 @@ fn set_column_for_json_rows( }) .transpose()?; if let Some(j) = maybe_value { - row.insert(col_name.to_string(), Value::try_from(&j).unwrap()); + row.insert(col_name.to_string(), Value::from(&j)); } else if explicit_nulls { row.insert(col_name.to_string(), Value::Null); }