Skip to content

Commit

Permalink
Add integrity and global hash check to validation (#851)
Browse files Browse the repository at this point in the history
Co-authored-by: rstanciu <rstanciu@rivian.com>
  • Loading branch information
upsicleclown and rstanciu committed Aug 31, 2023
1 parent 26e1da8 commit 875a942
Show file tree
Hide file tree
Showing 10 changed files with 168 additions and 32 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ serde = { version = "1", features = ["derive"] }
serde_json = "1"
serde_yaml = "0.9"
spreet = { version = "0.8", default-features = false }
sqlite-hashes = "0.2"
sqlite-hashes = "0.3"
sqlx = { version = "0.7", features = ["sqlite"] }
subst = { version = "0.2", features = ["yaml"] }
thiserror = "1"
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 27 additions & 6 deletions martin-mbtiles/src/bin/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ use std::path::{Path, PathBuf};

use anyhow::Result;
use clap::{Parser, Subcommand};
use martin_mbtiles::{apply_mbtiles_diff, copy_mbtiles_file, Mbtiles, TileCopierOptions};
use martin_mbtiles::{
apply_mbtiles_diff, copy_mbtiles_file, IntegrityCheck, Mbtiles, TileCopierOptions,
};
use sqlx::sqlite::SqliteConnectOptions;
use sqlx::{Connection, SqliteConnection};

Expand Down Expand Up @@ -62,6 +64,12 @@ enum Commands {
Validate {
/// MBTiles file to validate
file: PathBuf,
/// Value to specify the extent of the SQLite integrity check performed
#[arg(long, value_enum, default_value_t=IntegrityCheck::default())]
integrity_check: IntegrityCheck,
/// Generate a hash of the tile data hashes and store under the 'global_hash' key in metadata
#[arg(long)]
regenerate_global_hash: bool,
},
}

Expand All @@ -85,8 +93,12 @@ async fn main() -> Result<()> {
} => {
apply_mbtiles_diff(src_file, diff_file).await?;
}
Commands::Validate { file } => {
validate_mbtiles(file.as_path()).await?;
Commands::Validate {
file,
integrity_check,
regenerate_global_hash,
} => {
validate_mbtiles(file.as_path(), integrity_check, regenerate_global_hash).await?;
}
}

Expand All @@ -111,11 +123,18 @@ async fn meta_set_value(file: &Path, key: &str, value: Option<String>) -> Result
Ok(())
}

async fn validate_mbtiles(file: &Path) -> Result<()> {
async fn validate_mbtiles(
file: &Path,
integrity_check: IntegrityCheck,
regenerate_global_hash: bool,
) -> Result<()> {
let mbt = Mbtiles::new(file)?;
let opt = SqliteConnectOptions::new().filename(file).read_only(true);
let mut conn = SqliteConnection::connect_with(&opt).await?;
mbt.validate_mbtiles(&mut conn).await?;
mbt.validate_mbtiles(integrity_check, &mut conn).await?;
if regenerate_global_hash {
mbt.generate_global_hash(&mut conn).await?;
}
Ok(())
}

Expand All @@ -127,8 +146,8 @@ mod tests {
use clap::Parser;
use martin_mbtiles::{CopyDuplicateMode, TileCopierOptions};

use crate::Args;
use crate::Commands::{ApplyDiff, Copy, MetaGetValue, MetaSetValue, Validate};
use crate::{Args, IntegrityCheck};

#[test]
fn test_copy_no_arguments() {
Expand Down Expand Up @@ -407,6 +426,8 @@ mod tests {
verbose: false,
command: Validate {
file: PathBuf::from("src_file"),
integrity_check: IntegrityCheck::Quick,
regenerate_global_hash: false
}
}
);
Expand Down
9 changes: 5 additions & 4 deletions martin-mbtiles/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ use std::path::PathBuf;
use martin_tile_utils::TileInfo;
use sqlite_hashes::rusqlite;

use crate::mbtiles::MbtType;

#[derive(thiserror::Error, Debug)]
pub enum MbtError {
#[error("SQL Error {0}")]
Expand All @@ -22,11 +20,14 @@ pub enum MbtError {
#[error("Invalid data format for MBTile file {0}")]
InvalidDataFormat(String),

#[error("Integrity check failed for MBTile file {0} for the following reasons: \n {1:?}")]
FailedIntegrityCheck(String, Vec<String>),

#[error("Invalid tile data for MBTile file {0}")]
InvalidTileData(String),

#[error("Incorrect data format for MBTile file {0}; expected {1:?} and got {2:?}")]
IncorrectDataFormat(String, &'static [MbtType], MbtType),
#[error("Expected global_hash value in metadata table for MBTiles file {0}")]
GlobalHashValueNotFound(String),

#[error(r#"Filename "{0}" passed to SQLite must be valid UTF-8"#)]
InvalidFilenameType(PathBuf),
Expand Down
2 changes: 1 addition & 1 deletion martin-mbtiles/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ mod mbtiles_queries;
mod tile_copier;

pub use errors::MbtError;
pub use mbtiles::{Mbtiles, Metadata};
pub use mbtiles::{IntegrityCheck, Mbtiles, Metadata};
pub use mbtiles_pool::MbtilesPool;
pub use tile_copier::{
apply_mbtiles_diff, copy_mbtiles_file, CopyDuplicateMode, TileCopierOptions,
Expand Down
119 changes: 103 additions & 16 deletions martin-mbtiles/src/mbtiles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@ use martin_tile_utils::{Format, TileInfo};
use serde_json::{Value as JSONValue, Value};
use sqlite_hashes::register_md5_function;
use sqlite_hashes::rusqlite::Connection as RusqliteConnection;
use sqlx::sqlite::SqliteRow;
use sqlx::{query, Row, SqliteExecutor};
use tilejson::{tilejson, Bounds, Center, TileJSON};

use crate::errors::{MbtError, MbtResult};
use crate::mbtiles_queries::{
is_flat_tables_type, is_flat_with_hash_tables_type, is_normalized_tables_type,
};
use crate::MbtError::{IncorrectDataFormat, InvalidTileData};
use crate::MbtError::{FailedIntegrityCheck, GlobalHashValueNotFound, InvalidTileData};

#[derive(Clone, Debug, PartialEq)]
pub struct Metadata {
Expand All @@ -42,6 +43,15 @@ pub enum MbtType {
Normalized,
}

#[derive(PartialEq, Eq, Default, Debug, Clone)]
#[cfg_attr(feature = "cli", derive(ValueEnum))]
pub enum IntegrityCheck {
Full,
#[default]
Quick,
Off,
}

#[derive(Clone, Debug)]
pub struct Mbtiles {
filepath: String,
Expand Down Expand Up @@ -378,33 +388,102 @@ impl Mbtiles {
Err(MbtError::NoUniquenessConstraint(self.filepath.clone()))
}

pub async fn validate_mbtiles<T>(&self, conn: &mut T) -> MbtResult<()>
async fn get_global_hash<T>(&self, conn: &mut T) -> MbtResult<Option<String>>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
let rusqlite_conn = RusqliteConnection::open(Path::new(&self.filepath()))?;
register_md5_function(&rusqlite_conn)?;
let mbttype = self.detect_type(&mut *conn).await?;

let sql = match mbttype {
MbtType::Flat => {
return Err(IncorrectDataFormat(
self.filepath().to_string(),
&[MbtType::FlatWithHash, MbtType::Normalized],
MbtType::Flat,
));
}
MbtType::FlatWithHash => {
"SELECT * FROM tiles_with_hash WHERE tile_hash!=hex(md5(tile_data)) LIMIT 1;"
println!("Cannot generate global hash, no hash column in flat table format. Skipping global_hash generation...");
return Ok(None);
}
MbtType::Normalized => {
"SELECT * FROM images WHERE tile_id!=hex(md5(tile_data)) LIMIT 1;"
MbtType::FlatWithHash => "SELECT hex(md5_concat(cast(zoom_level AS text), cast(tile_column AS text), cast(tile_row AS text), tile_hash)) FROM tiles_with_hash ORDER BY zoom_level, tile_column, tile_row;",
MbtType::Normalized => "SELECT hex(md5_concat(cast(zoom_level AS text), cast(tile_column AS text), cast(tile_row AS text), tile_id)) FROM map ORDER BY zoom_level, tile_column, tile_row;"
};

Ok(Some(rusqlite_conn.query_row_and_then(sql, [], |row| {
row.get::<_, String>(0)
})?))
}

pub async fn generate_global_hash<T>(&self, conn: &mut T) -> MbtResult<()>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
if let Some(global_hash) = self.get_global_hash(&mut *conn).await? {
self.set_metadata_value(conn, "global_hash", Some(global_hash))
.await
} else {
Ok(())
}
}

pub async fn validate_mbtiles<T>(
&self,
integrity_check: IntegrityCheck,
conn: &mut T,
) -> MbtResult<()>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
// SQLite Integrity check
if integrity_check != IntegrityCheck::Off {
let sql = if integrity_check == IntegrityCheck::Full {
"PRAGMA integrity_check;"
} else {
"PRAGMA quick_check;"
};

let result = query(sql)
.map(|row: SqliteRow| row.get::<String, _>(0))
.fetch_all(&mut *conn)
.await?;

if result.len() > 1
|| result.get(0).ok_or(FailedIntegrityCheck(
self.filepath().to_string(),
vec!["SQLite could not perform integrity check".to_string()],
))? != "ok"
{
return Err(FailedIntegrityCheck(self.filepath().to_string(), result));
}
}
.to_string();

let mbttype = self.detect_type(&mut *conn).await?;

if mbttype == MbtType::Flat {
println!(
"No hash column in flat table format, skipping hash-based validation steps..."
);
return Ok(());
}

let rusqlite_conn = RusqliteConnection::open(Path::new(self.filepath()))?;
register_md5_function(&rusqlite_conn)?;

if rusqlite_conn.prepare(&sql)?.exists(())? {
// Global hash check
if let Some(global_hash) = self.get_metadata_value(&mut *conn, "global_hash").await? {
if let Some(new_global_hash) = self.get_global_hash(&mut *conn).await? {
if global_hash != new_global_hash {
return Err(InvalidTileData(self.filepath().to_string()));
}
}
} else {
return Err(GlobalHashValueNotFound(self.filepath().to_string()));
}

// Per-tile hash check
let sql = if mbttype == MbtType::FlatWithHash {
"SELECT 1 FROM tiles_with_hash WHERE tile_hash != hex(md5(tile_data)) LIMIT 1;"
} else {
"SELECT 1 FROM images WHERE tile_id != hex(md5(tile_data)) LIMIT 1;"
};

if rusqlite_conn.prepare(sql)?.exists(())? {
return Err(InvalidTileData(self.filepath().to_string()));
}

Expand Down Expand Up @@ -569,16 +648,24 @@ mod tests {
async fn validate_valid_file() {
let (mut conn, mbt) = open("../tests/fixtures/files/zoomed_world_cities.mbtiles").await;

mbt.validate_mbtiles(&mut conn).await.unwrap();
mbt.validate_mbtiles(IntegrityCheck::Quick, &mut conn)
.await
.unwrap();
}

#[actix_rt::test]
async fn validate_invalid_file() {
let (mut conn, mbt) =
open("../tests/fixtures/files/invalid_zoomed_world_cities.mbtiles").await;

print!(
"VLAUE {:?}",
mbt.validate_mbtiles(IntegrityCheck::Quick, &mut conn).await
);
assert!(matches!(
mbt.validate_mbtiles(&mut conn).await.unwrap_err(),
mbt.validate_mbtiles(IntegrityCheck::Quick, &mut conn)
.await
.unwrap_err(),
MbtError::InvalidTileData(..)
));
}
Expand Down
19 changes: 17 additions & 2 deletions martin-mbtiles/src/tile_copier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ pub struct TileCopierOptions {
src_file: PathBuf,
/// MBTiles file to write to
dst_file: PathBuf,
/// TODO: add documentation Output format of the destination file, ignored if the file exists. if not specified, defaults to the type of source
/// Output format of the destination file, ignored if the file exists. If not specified, defaults to the type of source
#[cfg_attr(feature = "cli", arg(long, value_enum))]
dst_mbttype: Option<MbtType>,
/// Specify copying behaviour when tiles with duplicate (zoom_level, tile_column, tile_row) values are found
#[cfg_attr(feature = "cli", arg(long, value_enum, default_value_t = CopyDuplicateMode::Override))]
#[cfg_attr(feature = "cli", arg(long, value_enum, default_value_t = CopyDuplicateMode::default()))]
on_duplicate: CopyDuplicateMode,
/// Minimum zoom level to copy
#[cfg_attr(feature = "cli", arg(long, conflicts_with("zoom_levels")))]
Expand All @@ -49,6 +49,9 @@ pub struct TileCopierOptions {
/// Compare source file with this file, and only copy non-identical tiles to destination
#[cfg_attr(feature = "cli", arg(long))]
diff_with_file: Option<PathBuf>,
/// Skip generating a global hash for mbtiles validation. By default, if dst_mbttype is flat-with-hash or normalized, generate a global hash and store in the metadata table
#[cfg_attr(feature = "cli", arg(long))]
skip_global_hash: bool,
}

#[cfg(feature = "cli")]
Expand Down Expand Up @@ -101,6 +104,7 @@ impl TileCopierOptions {
min_zoom: None,
max_zoom: None,
diff_with_file: None,
skip_global_hash: false,
}
}

Expand Down Expand Up @@ -133,6 +137,11 @@ impl TileCopierOptions {
self.diff_with_file = Some(diff_with_file);
self
}

pub fn skip_global_hash(mut self, skip_global_hash: bool) -> Self {
self.skip_global_hash = skip_global_hash;
self
}
}

impl TileCopier {
Expand Down Expand Up @@ -226,6 +235,12 @@ impl TileCopier {
}
};

if !self.options.skip_global_hash
&& (dst_mbttype == FlatWithHash || dst_mbttype == Normalized)
{
self.dst_mbtiles.generate_global_hash(&mut conn).await?;
}

Ok(conn)
}

Expand Down
Binary file modified tests/fixtures/files/invalid_zoomed_world_cities.mbtiles
Binary file not shown.
Binary file modified tests/fixtures/files/zoomed_world_cities.mbtiles
Binary file not shown.

0 comments on commit 875a942

Please sign in to comment.