diff --git a/Cargo.lock b/Cargo.lock index 51c4e74cb..8abad6a22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3043,9 +3043,9 @@ dependencies = [ [[package]] name = "sqlite-hashes" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "560fafa797fc0f0c1e54bb9c1f01e9d8f45f7771608d87c95ebec3cf9a3b9f45" +checksum = "73f60eaba33ed5ad1e3cceb991e04cf10ecd13ebe7f651c4dd5d346efc371b9a" dependencies = [ "digest", "md-5", diff --git a/Cargo.toml b/Cargo.toml index 0792e0ad0..babc63d71 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,7 +46,7 @@ serde = { version = "1", features = ["derive"] } serde_json = "1" serde_yaml = "0.9" spreet = { version = "0.8", default-features = false } -sqlite-hashes = "0.2" +sqlite-hashes = "0.3" sqlx = { version = "0.7", features = ["sqlite"] } subst = { version = "0.2", features = ["yaml"] } thiserror = "1" diff --git a/martin-mbtiles/.sqlx/query-8450267c60816a13cc70ee0ff635917961ef736c030ac577a487091a9724049d.json b/martin-mbtiles/.sqlx/query-8450267c60816a13cc70ee0ff635917961ef736c030ac577a487091a9724049d.json new file mode 100644 index 000000000..74adbbdcd --- /dev/null +++ b/martin-mbtiles/.sqlx/query-8450267c60816a13cc70ee0ff635917961ef736c030ac577a487091a9724049d.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "INSERT OR REPLACE INTO metadata(name, value) VALUES('global_hash', ?)", + "describe": { + "columns": [], + "parameters": { + "Right": 1 + }, + "nullable": [] + }, + "hash": "8450267c60816a13cc70ee0ff635917961ef736c030ac577a487091a9724049d" +} diff --git a/martin-mbtiles/src/bin/main.rs b/martin-mbtiles/src/bin/main.rs index b0b573035..6e5e663a5 100644 --- a/martin-mbtiles/src/bin/main.rs +++ b/martin-mbtiles/src/bin/main.rs @@ -2,7 +2,9 @@ use std::path::{Path, PathBuf}; use anyhow::Result; use clap::{Parser, Subcommand}; -use martin_mbtiles::{apply_mbtiles_diff, copy_mbtiles_file, Mbtiles, TileCopierOptions}; +use martin_mbtiles::{ + apply_mbtiles_diff, copy_mbtiles_file, IntegrityCheck, Mbtiles, TileCopierOptions, +}; use sqlx::sqlite::SqliteConnectOptions; use sqlx::{Connection, SqliteConnection}; @@ -62,6 +64,12 @@ enum Commands { Validate { /// MBTiles file to validate file: PathBuf, + /// Value to specify the extent of the SQLite integrity check performed + #[arg(long, value_enum, default_value_t=IntegrityCheck::default())] + integrity_check: IntegrityCheck, + /// Generate a hash of the tile data hashes and store under the 'global_hash' key in metadata + #[arg(long)] + regenerate_global_hash: bool, }, } @@ -85,8 +93,12 @@ async fn main() -> Result<()> { } => { apply_mbtiles_diff(src_file, diff_file).await?; } - Commands::Validate { file } => { - validate_mbtiles(file.as_path()).await?; + Commands::Validate { + file, + integrity_check, + regenerate_global_hash, + } => { + validate_mbtiles(file.as_path(), integrity_check, regenerate_global_hash).await?; } } @@ -111,11 +123,18 @@ async fn meta_set_value(file: &Path, key: &str, value: Option) -> Result Ok(()) } -async fn validate_mbtiles(file: &Path) -> Result<()> { +async fn validate_mbtiles( + file: &Path, + integrity_check: IntegrityCheck, + regenerate_global_hash: bool, +) -> Result<()> { let mbt = Mbtiles::new(file)?; let opt = SqliteConnectOptions::new().filename(file).read_only(true); let mut conn = SqliteConnection::connect_with(&opt).await?; - mbt.validate_mbtiles(&mut conn).await?; + mbt.validate_mbtiles(integrity_check, &mut conn).await?; + if regenerate_global_hash { + mbt.generate_global_hash(&mut conn).await?; + } Ok(()) } @@ -127,8 +146,8 @@ mod tests { use clap::Parser; use martin_mbtiles::{CopyDuplicateMode, TileCopierOptions}; - use crate::Args; use crate::Commands::{ApplyDiff, Copy, MetaGetValue, MetaSetValue, Validate}; + use crate::{Args, IntegrityCheck}; #[test] fn test_copy_no_arguments() { @@ -407,6 +426,8 @@ mod tests { verbose: false, command: Validate { file: PathBuf::from("src_file"), + integrity_check: IntegrityCheck::Quick, + regenerate_global_hash: false } } ); diff --git a/martin-mbtiles/src/errors.rs b/martin-mbtiles/src/errors.rs index cdcf21f2a..96f46d883 100644 --- a/martin-mbtiles/src/errors.rs +++ b/martin-mbtiles/src/errors.rs @@ -3,8 +3,6 @@ use std::path::PathBuf; use martin_tile_utils::TileInfo; use sqlite_hashes::rusqlite; -use crate::mbtiles::MbtType; - #[derive(thiserror::Error, Debug)] pub enum MbtError { #[error("SQL Error {0}")] @@ -22,11 +20,14 @@ pub enum MbtError { #[error("Invalid data format for MBTile file {0}")] InvalidDataFormat(String), + #[error("Integrity check failed for MBTile file {0} for the following reasons: \n {1:?}")] + FailedIntegrityCheck(String, Vec), + #[error("Invalid tile data for MBTile file {0}")] InvalidTileData(String), - #[error("Incorrect data format for MBTile file {0}; expected {1:?} and got {2:?}")] - IncorrectDataFormat(String, &'static [MbtType], MbtType), + #[error("Expected global_hash value in metadata table for MBTiles file {0}")] + GlobalHashValueNotFound(String), #[error(r#"Filename "{0}" passed to SQLite must be valid UTF-8"#)] InvalidFilenameType(PathBuf), diff --git a/martin-mbtiles/src/lib.rs b/martin-mbtiles/src/lib.rs index 59eaf2e3d..e6f63d29f 100644 --- a/martin-mbtiles/src/lib.rs +++ b/martin-mbtiles/src/lib.rs @@ -7,7 +7,7 @@ mod mbtiles_queries; mod tile_copier; pub use errors::MbtError; -pub use mbtiles::{Mbtiles, Metadata}; +pub use mbtiles::{IntegrityCheck, Mbtiles, Metadata}; pub use mbtiles_pool::MbtilesPool; pub use tile_copier::{ apply_mbtiles_diff, copy_mbtiles_file, CopyDuplicateMode, TileCopierOptions, diff --git a/martin-mbtiles/src/mbtiles.rs b/martin-mbtiles/src/mbtiles.rs index 380772a90..4c6db100e 100644 --- a/martin-mbtiles/src/mbtiles.rs +++ b/martin-mbtiles/src/mbtiles.rs @@ -16,6 +16,7 @@ use martin_tile_utils::{Format, TileInfo}; use serde_json::{Value as JSONValue, Value}; use sqlite_hashes::register_md5_function; use sqlite_hashes::rusqlite::Connection as RusqliteConnection; +use sqlx::sqlite::SqliteRow; use sqlx::{query, Row, SqliteExecutor}; use tilejson::{tilejson, Bounds, Center, TileJSON}; @@ -23,7 +24,7 @@ use crate::errors::{MbtError, MbtResult}; use crate::mbtiles_queries::{ is_flat_tables_type, is_flat_with_hash_tables_type, is_normalized_tables_type, }; -use crate::MbtError::{IncorrectDataFormat, InvalidTileData}; +use crate::MbtError::{FailedIntegrityCheck, GlobalHashValueNotFound, InvalidTileData}; #[derive(Clone, Debug, PartialEq)] pub struct Metadata { @@ -42,6 +43,15 @@ pub enum MbtType { Normalized, } +#[derive(PartialEq, Eq, Default, Debug, Clone)] +#[cfg_attr(feature = "cli", derive(ValueEnum))] +pub enum IntegrityCheck { + Full, + #[default] + Quick, + Off, +} + #[derive(Clone, Debug)] pub struct Mbtiles { filepath: String, @@ -378,33 +388,102 @@ impl Mbtiles { Err(MbtError::NoUniquenessConstraint(self.filepath.clone())) } - pub async fn validate_mbtiles(&self, conn: &mut T) -> MbtResult<()> + async fn get_global_hash(&self, conn: &mut T) -> MbtResult> where for<'e> &'e mut T: SqliteExecutor<'e>, { + let rusqlite_conn = RusqliteConnection::open(Path::new(&self.filepath()))?; + register_md5_function(&rusqlite_conn)?; let mbttype = self.detect_type(&mut *conn).await?; let sql = match mbttype { MbtType::Flat => { - return Err(IncorrectDataFormat( - self.filepath().to_string(), - &[MbtType::FlatWithHash, MbtType::Normalized], - MbtType::Flat, - )); - } - MbtType::FlatWithHash => { - "SELECT * FROM tiles_with_hash WHERE tile_hash!=hex(md5(tile_data)) LIMIT 1;" + println!("Cannot generate global hash, no hash column in flat table format. Skipping global_hash generation..."); + return Ok(None); } - MbtType::Normalized => { - "SELECT * FROM images WHERE tile_id!=hex(md5(tile_data)) LIMIT 1;" + MbtType::FlatWithHash => "SELECT hex(md5_concat(cast(zoom_level AS text), cast(tile_column AS text), cast(tile_row AS text), tile_hash)) FROM tiles_with_hash ORDER BY zoom_level, tile_column, tile_row;", + MbtType::Normalized => "SELECT hex(md5_concat(cast(zoom_level AS text), cast(tile_column AS text), cast(tile_row AS text), tile_id)) FROM map ORDER BY zoom_level, tile_column, tile_row;" + }; + + Ok(Some(rusqlite_conn.query_row_and_then(sql, [], |row| { + row.get::<_, String>(0) + })?)) + } + + pub async fn generate_global_hash(&self, conn: &mut T) -> MbtResult<()> + where + for<'e> &'e mut T: SqliteExecutor<'e>, + { + if let Some(global_hash) = self.get_global_hash(&mut *conn).await? { + self.set_metadata_value(conn, "global_hash", Some(global_hash)) + .await + } else { + Ok(()) + } + } + + pub async fn validate_mbtiles( + &self, + integrity_check: IntegrityCheck, + conn: &mut T, + ) -> MbtResult<()> + where + for<'e> &'e mut T: SqliteExecutor<'e>, + { + // SQLite Integrity check + if integrity_check != IntegrityCheck::Off { + let sql = if integrity_check == IntegrityCheck::Full { + "PRAGMA integrity_check;" + } else { + "PRAGMA quick_check;" + }; + + let result = query(sql) + .map(|row: SqliteRow| row.get::(0)) + .fetch_all(&mut *conn) + .await?; + + if result.len() > 1 + || result.get(0).ok_or(FailedIntegrityCheck( + self.filepath().to_string(), + vec!["SQLite could not perform integrity check".to_string()], + ))? != "ok" + { + return Err(FailedIntegrityCheck(self.filepath().to_string(), result)); } } - .to_string(); + + let mbttype = self.detect_type(&mut *conn).await?; + + if mbttype == MbtType::Flat { + println!( + "No hash column in flat table format, skipping hash-based validation steps..." + ); + return Ok(()); + } let rusqlite_conn = RusqliteConnection::open(Path::new(self.filepath()))?; register_md5_function(&rusqlite_conn)?; - if rusqlite_conn.prepare(&sql)?.exists(())? { + // Global hash check + if let Some(global_hash) = self.get_metadata_value(&mut *conn, "global_hash").await? { + if let Some(new_global_hash) = self.get_global_hash(&mut *conn).await? { + if global_hash != new_global_hash { + return Err(InvalidTileData(self.filepath().to_string())); + } + } + } else { + return Err(GlobalHashValueNotFound(self.filepath().to_string())); + } + + // Per-tile hash check + let sql = if mbttype == MbtType::FlatWithHash { + "SELECT 1 FROM tiles_with_hash WHERE tile_hash != hex(md5(tile_data)) LIMIT 1;" + } else { + "SELECT 1 FROM images WHERE tile_id != hex(md5(tile_data)) LIMIT 1;" + }; + + if rusqlite_conn.prepare(sql)?.exists(())? { return Err(InvalidTileData(self.filepath().to_string())); } @@ -569,7 +648,9 @@ mod tests { async fn validate_valid_file() { let (mut conn, mbt) = open("../tests/fixtures/files/zoomed_world_cities.mbtiles").await; - mbt.validate_mbtiles(&mut conn).await.unwrap(); + mbt.validate_mbtiles(IntegrityCheck::Quick, &mut conn) + .await + .unwrap(); } #[actix_rt::test] @@ -577,8 +658,14 @@ mod tests { let (mut conn, mbt) = open("../tests/fixtures/files/invalid_zoomed_world_cities.mbtiles").await; + print!( + "VLAUE {:?}", + mbt.validate_mbtiles(IntegrityCheck::Quick, &mut conn).await + ); assert!(matches!( - mbt.validate_mbtiles(&mut conn).await.unwrap_err(), + mbt.validate_mbtiles(IntegrityCheck::Quick, &mut conn) + .await + .unwrap_err(), MbtError::InvalidTileData(..) )); } diff --git a/martin-mbtiles/src/tile_copier.rs b/martin-mbtiles/src/tile_copier.rs index b7478c6f2..45707a383 100644 --- a/martin-mbtiles/src/tile_copier.rs +++ b/martin-mbtiles/src/tile_copier.rs @@ -31,11 +31,11 @@ pub struct TileCopierOptions { src_file: PathBuf, /// MBTiles file to write to dst_file: PathBuf, - /// TODO: add documentation Output format of the destination file, ignored if the file exists. if not specified, defaults to the type of source + /// Output format of the destination file, ignored if the file exists. If not specified, defaults to the type of source #[cfg_attr(feature = "cli", arg(long, value_enum))] dst_mbttype: Option, /// Specify copying behaviour when tiles with duplicate (zoom_level, tile_column, tile_row) values are found - #[cfg_attr(feature = "cli", arg(long, value_enum, default_value_t = CopyDuplicateMode::Override))] + #[cfg_attr(feature = "cli", arg(long, value_enum, default_value_t = CopyDuplicateMode::default()))] on_duplicate: CopyDuplicateMode, /// Minimum zoom level to copy #[cfg_attr(feature = "cli", arg(long, conflicts_with("zoom_levels")))] @@ -49,6 +49,9 @@ pub struct TileCopierOptions { /// Compare source file with this file, and only copy non-identical tiles to destination #[cfg_attr(feature = "cli", arg(long))] diff_with_file: Option, + /// Skip generating a global hash for mbtiles validation. By default, if dst_mbttype is flat-with-hash or normalized, generate a global hash and store in the metadata table + #[cfg_attr(feature = "cli", arg(long))] + skip_global_hash: bool, } #[cfg(feature = "cli")] @@ -101,6 +104,7 @@ impl TileCopierOptions { min_zoom: None, max_zoom: None, diff_with_file: None, + skip_global_hash: false, } } @@ -133,6 +137,11 @@ impl TileCopierOptions { self.diff_with_file = Some(diff_with_file); self } + + pub fn skip_global_hash(mut self, skip_global_hash: bool) -> Self { + self.skip_global_hash = skip_global_hash; + self + } } impl TileCopier { @@ -226,6 +235,12 @@ impl TileCopier { } }; + if !self.options.skip_global_hash + && (dst_mbttype == FlatWithHash || dst_mbttype == Normalized) + { + self.dst_mbtiles.generate_global_hash(&mut conn).await?; + } + Ok(conn) } diff --git a/tests/fixtures/files/invalid_zoomed_world_cities.mbtiles b/tests/fixtures/files/invalid_zoomed_world_cities.mbtiles index 9e0e31c0c..3012ef4b1 100644 Binary files a/tests/fixtures/files/invalid_zoomed_world_cities.mbtiles and b/tests/fixtures/files/invalid_zoomed_world_cities.mbtiles differ diff --git a/tests/fixtures/files/zoomed_world_cities.mbtiles b/tests/fixtures/files/zoomed_world_cities.mbtiles index 9edf9d4fd..bc33764db 100644 Binary files a/tests/fixtures/files/zoomed_world_cities.mbtiles and b/tests/fixtures/files/zoomed_world_cities.mbtiles differ