Skip to content

Commit

Permalink
Fix snapshot tar symlinks validation (#4033)
Browse files Browse the repository at this point in the history
* add validation for snapshot archives for not containing any symlinks

* fix for test

* fmt

* Only allow regular file and directory entry types in snapshot archives

---------

Co-authored-by: timvisee <tim@visee.me>
  • Loading branch information
generall and timvisee committed Apr 15, 2024
1 parent 03166ce commit ee7a31e
Show file tree
Hide file tree
Showing 9 changed files with 93 additions and 29 deletions.
4 changes: 2 additions & 2 deletions lib/collection/src/collection/snapshots.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use tokio::fs;
use super::Collection;
use crate::collection::CollectionVersion;
use crate::common::snapshots_manager::SnapshotStorageManager;
use crate::common::validate_snapshot_archive::validate_open_snapshot_archive;
use crate::config::{CollectionConfig, ShardingMethod};
use crate::operations::snapshot_ops::SnapshotDescription;
use crate::operations::types::{CollectionError, CollectionResult, NodeType};
Expand Down Expand Up @@ -187,8 +188,7 @@ impl Collection {
is_distributed: bool,
) -> CollectionResult<()> {
// decompress archive
let archive_file = std::fs::File::open(snapshot_path)?;
let mut ar = tar::Archive::new(archive_file);
let mut ar = validate_open_snapshot_archive(snapshot_path)?;
ar.unpack(target_dir)?;

let config = CollectionConfig::load(target_dir)?;
Expand Down
1 change: 1 addition & 0 deletions lib/collection/src/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ pub mod snapshots_manager;
pub mod stoppable_task;
pub mod stoppable_task_async;
pub mod stopping_guard;
pub mod validate_snapshot_archive;
13 changes: 13 additions & 0 deletions lib/collection/src/common/validate_snapshot_archive.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
use std::fs::File;
use std::path::Path;

use segment::common::validate_snapshot_archive::open_snapshot_archive_with_validation;
use tar::Archive;

use crate::operations::types::CollectionResult;

pub fn validate_open_snapshot_archive<P: AsRef<Path>>(
archive_path: P,
) -> CollectionResult<Archive<File>> {
Ok(open_snapshot_archive_with_validation(archive_path)?)
}
6 changes: 3 additions & 3 deletions lib/collection/src/shards/shard_holder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use tokio::sync::RwLock;

use super::replica_set::AbortShardTransfer;
use super::transfer::transfer_tasks_pool::TransferTasksPool;
use crate::common::validate_snapshot_archive::validate_open_snapshot_archive;
use crate::config::{CollectionConfig, ShardingMethod};
use crate::hash_ring::HashRing;
use crate::operations::shard_selector_internal::ShardSelectorInternal;
Expand Down Expand Up @@ -802,14 +803,13 @@ impl ShardHolder {
return Err(shard_not_found_error(shard_id));
}

let snapshot = std::fs::File::open(snapshot_path)?;

if !temp_dir.exists() {
std::fs::create_dir_all(temp_dir)?;
}

let snapshot_file_name = snapshot_path.file_name().unwrap().to_string_lossy();

let snapshot_path = snapshot_path.to_path_buf();
let snapshot_temp_dir = tempfile::Builder::new()
.prefix(&format!(
"{collection_name}-shard-{shard_id}-{snapshot_file_name}"
Expand All @@ -822,7 +822,7 @@ impl ShardHolder {
cancel::blocking::spawn_cancel_on_token(
cancel.child_token(),
move |cancel| -> CollectionResult<_> {
let mut tar = tar::Archive::new(snapshot);
let mut tar = validate_open_snapshot_archive(snapshot_path)?;

if cancel.is_cancelled() {
return Err(cancel::Error::Cancelled.into());
Expand Down
33 changes: 21 additions & 12 deletions lib/collection/src/tests/snapshot_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,7 @@ async fn _test_snapshot_collection(node_type: NodeType) {

let snapshots_path = Builder::new().prefix("test_snapshots").tempdir().unwrap();
let collection_dir = Builder::new().prefix("test_collection").tempdir().unwrap();
let recover_dir = Builder::new()
.prefix("test_collection_rec")
.tempdir()
.unwrap();

let collection_name = "test".to_string();
let collection_name_rec = "test_rec".to_string();
let mut shards = HashMap::new();
Expand Down Expand Up @@ -115,14 +112,26 @@ async fn _test_snapshot_collection(node_type: NodeType) {
.unwrap();

assert_eq!(snapshot_description.checksum.unwrap().len(), 64);
// Do not recover in local mode if some shards are remote
assert!(Collection::restore_snapshot(
&snapshots_path.path().join(&snapshot_description.name),
recover_dir.path(),
0,
false,
)
.is_err());

{
let recover_dir = Builder::new()
.prefix("test_collection_rec")
.tempdir()
.unwrap();
// Do not recover in local mode if some shards are remote
assert!(Collection::restore_snapshot(
&snapshots_path.path().join(&snapshot_description.name),
recover_dir.path(),
0,
false,
)
.is_err());
}

let recover_dir = Builder::new()
.prefix("test_collection_rec")
.tempdir()
.unwrap();

if let Err(err) = Collection::restore_snapshot(
&snapshots_path.path().join(snapshot_description.name),
Expand Down
1 change: 1 addition & 0 deletions lib/segment/src/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pub mod rocksdb_buffered_delete_wrapper;
pub mod rocksdb_buffered_update_wrapper;
pub mod rocksdb_wrapper;
pub mod utils;
pub mod validate_snapshot_archive;
pub mod vector_utils;
pub mod version;

Expand Down
45 changes: 45 additions & 0 deletions lib/segment/src/common/validate_snapshot_archive.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
use std::fs::File;
use std::path::Path;

use tar::Archive;

use crate::common::operation_error::{OperationError, OperationResult};

pub fn open_snapshot_archive_with_validation<P: AsRef<Path>>(
snapshot_path: P,
) -> OperationResult<Archive<File>> {
let path = snapshot_path.as_ref();
{
let archive_file = File::open(path).map_err(|err| {
OperationError::service_error(format!(
"failed to open segment snapshot archive {path:?}: {err}"
))
})?;
let mut ar = Archive::new(archive_file);

for entry in ar.entries_with_seek()? {
let entry_type = entry?.header().entry_type();
if !matches!(
entry_type,
tar::EntryType::Regular | tar::EntryType::Directory,
) {
return Err(OperationError::ValidationError {
description: format!(
"Malformed snapshot, tar archive contains {entry_type:?} entry",
),
});
}
}
}

let archive_file = File::open(path).map_err(|err| {
OperationError::service_error(format!(
"failed to open segment snapshot archive {path:?}: {err}"
))
})?;

let mut ar = Archive::new(archive_file);
ar.set_overwrite(false);

Ok(ar)
}
15 changes: 5 additions & 10 deletions lib/segment/src/segment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use crate::common::operation_error::OperationError::TypeInferenceError;
use crate::common::operation_error::{
get_service_error, OperationError, OperationResult, SegmentFailedState,
};
use crate::common::validate_snapshot_archive::open_snapshot_archive_with_validation;
use crate::common::version::{StorageVersion, VERSION_FILE};
use crate::common::{check_named_vectors, check_query_vectors, check_stopped, check_vector_name};
use crate::data_types::named_vectors::NamedVectors;
Expand Down Expand Up @@ -474,20 +475,14 @@ impl Segment {
pub fn restore_snapshot(snapshot_path: &Path, segment_id: &str) -> OperationResult<()> {
let segment_path = snapshot_path.parent().unwrap().join(segment_id);

let archive_file = File::open(snapshot_path).map_err(|err| {
let mut archive = open_snapshot_archive_with_validation(snapshot_path)?;

archive.unpack(&segment_path).map_err(|err| {
OperationError::service_error(format!(
"failed to open segment snapshot archive {snapshot_path:?}: {err}"
"failed to unpack segment snapshot archive {snapshot_path:?}: {err}"
))
})?;

tar::Archive::new(archive_file)
.unpack(&segment_path)
.map_err(|err| {
OperationError::service_error(format!(
"failed to unpack segment snapshot archive {snapshot_path:?}: {err}"
))
})?;

let snapshot_path = segment_path.join(SNAPSHOT_PATH);

if snapshot_path.exists() {
Expand Down
4 changes: 2 additions & 2 deletions src/snapshots.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::fs::{self, remove_dir_all, rename};
use std::path::{Path, PathBuf};

use collection::collection::Collection;
use collection::common::validate_snapshot_archive::validate_open_snapshot_archive;
use collection::shards::shard::PeerId;
use log::info;
use storage::content_manager::alias_mapping::AliasPersistence;
Expand Down Expand Up @@ -94,8 +95,7 @@ pub fn recover_full_snapshot(
fs::create_dir_all(&snapshot_temp_path).unwrap();

// Un-tar snapshot into temporary directory
let archive_file = fs::File::open(snapshot_path).unwrap();
let mut ar = tar::Archive::new(archive_file);
let mut ar = validate_open_snapshot_archive(snapshot_path).unwrap();
ar.unpack(&snapshot_temp_path).unwrap();

// Read configuration file with snapshot-to-collection mapping
Expand Down

0 comments on commit ee7a31e

Please sign in to comment.