diff --git a/Cargo.toml b/Cargo.toml index 48a0fe01..76ff87c6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,11 +52,11 @@ futures = "0.3.31" futures-core = "0.3.31" futures-util = "0.3.31" geo = "0.30.0" -geo-traits = "0.2.0" -geo-types = "0.7.15" -geoarrow-array = { git = "https://github.com/geoarrow/geoarrow-rs/", rev = "17bf33e4cf78b060afa08ca9560dc4efd73c2c76" } -geoarrow-geoparquet = { git = "https://github.com/geoarrow/geoarrow-rs/", rev = "17bf33e4cf78b060afa08ca9560dc4efd73c2c76" } -geoarrow-schema = { git = "https://github.com/geoarrow/geoarrow-rs/", rev = "17bf33e4cf78b060afa08ca9560dc4efd73c2c76" } +geo-traits = "0.3.0" +geo-types = "0.7.16" +geoarrow-array = { git = "https://github.com/geoarrow/geoarrow-rs/", rev = "d27500849c6cee019535d6749991d1fd122baecf" } +geoparquet = { git = "https://github.com/geoarrow/geoarrow-rs/", rev = "d27500849c6cee019535d6749991d1fd122baecf" } +geoarrow-schema = { git = "https://github.com/geoarrow/geoarrow-rs/", rev = "d27500849c6cee019535d6749991d1fd122baecf" } geojson = "0.24.1" getrandom = { version = "0.3.3", features = ["wasm_js"] } http = "1.1" diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 586d1542..861ae376 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -23,8 +23,8 @@ geoarrow = [ "dep:geo-traits", "dep:geo-types", ] -geoparquet = ["geoarrow", "dep:geoarrow-geoparquet", "dep:parquet"] -geoparquet-compression = ["geoparquet", "geoarrow-geoparquet/compression"] +geoparquet = ["geoarrow", "dep:geoparquet", "dep:parquet"] +geoparquet-compression = ["geoparquet", "geoparquet/compression"] [dependencies] arrow-array = { workspace = true, optional = true, features = ["chrono-tz"] } @@ -37,9 +37,9 @@ geo = { workspace = true, optional = true } geo-traits = { workspace = true, optional = true } geo-types = { workspace = true, optional = true } geoarrow-array = { workspace = true, optional = true } -geoarrow-geoparquet = { workspace = true, optional = true } geoarrow-schema = { workspace = true, optional = true } geojson.workspace = true +geoparquet = { workspace = true, optional = true } indexmap.workspace = true log.workspace = true mime.workspace = true diff --git a/crates/core/src/error.rs b/crates/core/src/error.rs index b93d135e..f4076eee 100644 --- a/crates/core/src/error.rs +++ b/crates/core/src/error.rs @@ -18,10 +18,10 @@ pub enum Error { #[error("{0} is not enabled")] FeatureNotEnabled(&'static str), - /// [geoarrow_array::error::GeoArrowError] + /// [geoarrow_schema::error::GeoArrowError] #[error(transparent)] #[cfg(feature = "geoarrow")] - GeoArrow(#[from] geoarrow_array::error::GeoArrowError), + GeoArrow(#[from] geoarrow_schema::error::GeoArrowError), /// [geojson::Error] #[error(transparent)] diff --git a/crates/core/src/geoarrow/json.rs b/crates/core/src/geoarrow/json.rs index 6ad0ca61..6774e59a 100644 --- a/crates/core/src/geoarrow/json.rs +++ b/crates/core/src/geoarrow/json.rs @@ -39,8 +39,7 @@ const TOP_LEVEL_KEYS: [&str; 10] = [ ]; use crate::Error; -use arrow_array::RecordBatchReader; -use arrow_array::{cast::*, types::*, *}; +use arrow_array::{RecordBatchReader, cast::*, types::*, *}; use arrow_cast::display::{ArrayFormatter, FormatOptions}; use arrow_json::JsonSerializable; use arrow_schema::*; @@ -49,9 +48,10 @@ use geo_traits::to_geo::{ ToGeoGeometry, ToGeoGeometryCollection, ToGeoLineString, ToGeoMultiLineString, ToGeoMultiPoint, ToGeoMultiPolygon, ToGeoPoint, ToGeoPolygon, ToGeoRect, }; -use geoarrow_array::array::from_arrow_array; -use geoarrow_array::cast::AsGeoArrowArray; -use geoarrow_array::{ArrayAccessor, GeoArrowArray, GeoArrowType}; +use geoarrow_array::{ + GeoArrowArray, GeoArrowArrayAccessor, array::from_arrow_array, cast::AsGeoArrowArray, +}; +use geoarrow_schema::GeoArrowType; use serde_json::{Value, json, map::Map as JsonMap}; use std::{iter, sync::Arc}; @@ -469,6 +469,8 @@ fn set_geometry_column_for_json_rows( LargeWkb(_) => geojson::Value::from(&array.as_wkb::().value(i)?.to_geometry()), Wkt(_) => geojson::Value::from(&array.as_wkt::().value(i)?.to_geometry()), LargeWkt(_) => geojson::Value::from(&array.as_wkt::().value(i)?.to_geometry()), + WktView(_) => geojson::Value::from(&array.as_wkt_view().value(i)?.to_geometry()), + WkbView(_) => geojson::Value::from(&array.as_wkb_view().value(i)?.to_geometry()), }; let _ = row.insert( col_name.to_string(), diff --git a/crates/core/src/geoarrow/mod.rs b/crates/core/src/geoarrow/mod.rs index 911088c6..13ce978b 100644 --- a/crates/core/src/geoarrow/mod.rs +++ b/crates/core/src/geoarrow/mod.rs @@ -8,11 +8,11 @@ use arrow_json::ReaderBuilder; use arrow_schema::{DataType, Field, SchemaBuilder, SchemaRef, TimeUnit}; use geo_types::Geometry; use geoarrow_array::{ - GeoArrowArray, GeoArrowType, + GeoArrowArray, array::{WkbArray, from_arrow_array}, builder::GeometryBuilder, }; -use geoarrow_schema::{CoordType, GeometryType, Metadata}; +use geoarrow_schema::{CoordType, GeoArrowType, GeometryType, Metadata}; use serde_json::{Value, json}; use std::{collections::HashMap, sync::Arc}; @@ -322,7 +322,7 @@ pub fn add_wkb_metadata(mut record_batch: RecordBatch, column_name: &str) -> Res mod tests { use super::Table; use crate::{Item, ItemCollection}; - use geoarrow_geoparquet::GeoParquetRecordBatchReaderBuilder; + use geoparquet::GeoParquetRecordBatchReaderBuilder; use std::fs::File; #[test] diff --git a/crates/core/src/geoparquet.rs b/crates/core/src/geoparquet.rs index 974bc7c6..038f1d7c 100644 --- a/crates/core/src/geoparquet.rs +++ b/crates/core/src/geoparquet.rs @@ -5,7 +5,7 @@ use crate::{ geoarrow::{Table, VERSION, VERSION_KEY}, }; use bytes::Bytes; -use geoarrow_geoparquet::{GeoParquetRecordBatchReaderBuilder, GeoParquetWriterOptions}; +use geoparquet::{GeoParquetRecordBatchReaderBuilder, GeoParquetWriterOptions}; use parquet::{ file::{properties::WriterProperties, reader::ChunkReader}, format::KeyValue, @@ -53,7 +53,7 @@ pub fn into_writer(writer: W, item_collection: impl Into) -> where W: Write + Send, { - into_writer_with_options(writer, item_collection, &Default::default()) + into_writer_with_options(writer, item_collection, Default::default()) } /// Writes a [ItemCollection] to a [std::io::Write] as @@ -89,7 +89,7 @@ where }])) .build(); options.writer_properties = Some(writer_properties); - into_writer_with_options(writer, item_collection, &options) + into_writer_with_options(writer, item_collection, options) } /// Writes a [ItemCollection] to a [std::io::Write] as @@ -103,18 +103,25 @@ where /// /// let item: Item = stac::read("examples/simple-item.json").unwrap(); /// let mut cursor = Cursor::new(Vec::new()); -/// stac::geoparquet::into_writer_with_options(&mut cursor, vec![item], &Default::default()).unwrap(); +/// stac::geoparquet::into_writer_with_options(&mut cursor, vec![item], Default::default()).unwrap(); /// ``` pub fn into_writer_with_options( writer: W, item_collection: impl Into, - options: &GeoParquetWriterOptions, + mut options: GeoParquetWriterOptions, ) -> Result<()> where W: Write + Send, { + if let Some(primary_column) = options.primary_column.as_deref() { + if primary_column != "geometry" { + log::warn!("primary column not set to 'geometry'"); + } + } else { + options.primary_column = Some("geometry".to_string()); + } let table = Table::from_item_collection(item_collection)?; - geoarrow_geoparquet::write_geoparquet(Box::new(table.into_reader()), writer, options)?; + geoparquet::write_geoparquet(Box::new(table.into_reader()), writer, &options)?; Ok(()) } /// Create a STAC object from geoparquet data. @@ -255,6 +262,7 @@ impl IntoGeoparquet for serde_json::Value { mod tests { use crate::{FromGeoparquet, Item, ItemCollection, SelfHref, Value}; use bytes::Bytes; + use parquet::file::reader::{FileReader, SerializedFileReader}; use std::{ fs::File, io::{Cursor, Read}, @@ -296,6 +304,27 @@ mod tests { assert_eq!(item_collection.items.len(), 2); } + #[test] + fn geometry_primary_column() { + // https://github.com/stac-utils/rustac/issues/755 + let item_collection: ItemCollection = crate::read("data/multi-polygons.json").unwrap(); + let mut cursor = Cursor::new(Vec::new()); + super::into_writer(&mut cursor, item_collection).unwrap(); + let bytes = Bytes::from(cursor.into_inner()); + let reader = SerializedFileReader::new(bytes).unwrap(); + let key_value = reader + .metadata() + .file_metadata() + .key_value_metadata() + .unwrap() + .into_iter() + .find(|key_value| key_value.key == "geo") + .unwrap(); + let value: serde_json::Value = + serde_json::from_str(key_value.value.as_deref().unwrap()).unwrap(); + assert_eq!(value["primary_column"], "geometry"); + } + #[test] fn from_bytes() { let mut buf = Vec::new(); diff --git a/crates/duckdb/Cargo.toml b/crates/duckdb/Cargo.toml index cc974ebb..6c040d3d 100644 --- a/crates/duckdb/Cargo.toml +++ b/crates/duckdb/Cargo.toml @@ -21,7 +21,7 @@ chrono.workspace = true cql2.workspace = true duckdb.workspace = true geo.workspace = true -geoarrow-array = { workspace = true } +geoarrow-schema = { workspace = true } geojson.workspace = true getrandom.workspace = true log.workspace = true diff --git a/crates/duckdb/src/error.rs b/crates/duckdb/src/error.rs index b38c7420..c28b0205 100644 --- a/crates/duckdb/src/error.rs +++ b/crates/duckdb/src/error.rs @@ -16,9 +16,9 @@ pub enum Error { #[error(transparent)] DuckDB(#[from] duckdb::Error), - /// [geoarrow_array::error::GeoArrowError] + /// [geoarrow_schema::error::GeoArrowError] #[error(transparent)] - GeoArrow(#[from] geoarrow_array::error::GeoArrowError), + GeoArrow(#[from] geoarrow_schema::error::GeoArrowError), /// [serde_json::Error] #[error(transparent)]