Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Mitigated RUSTSEC-2020-0159
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Nov 10, 2021
1 parent add3e2a commit 16287b2
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 100 deletions.
4 changes: 4 additions & 0 deletions .cargo/audit.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,8 @@ ignore = [
# Therefore, this advisory does not affect us.
"RUSTSEC-2020-0071",
"RUSTSEC-2020-0159", # same as previous

# this cannot be addressed, only mitigated.
# See [.github/workflows/security.yml] for details on how we mitigate this.
"RUSTSEC-2021-0122",
]
24 changes: 24 additions & 0 deletions .github/workflows/security.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,27 @@ jobs:
- uses: actions-rs/audit-check@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}

# mitigation for RUSTSEC-2021-0122
# flatbuffers' usage of `unsafe` is problematic and a risk.
# This performs a round-trip over IPC (that uses flatbuffers) for some arrow types
# using miri, which hits much of `flatbuffers` usage in this crate.
miri-checks:
name: RUSTSEC-2021-0122 mitigation
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
submodules: true # needed to test IPC, which are located in a submodule
- uses: actions-rs/toolchain@v1
with:
toolchain: nightly-2021-10-24
override: true
- uses: Swatinem/rust-cache@v1
- name: Install Miri
run: |
rustup component add miri
cargo miri setup
- name: Run
run: MIRIFLAGS="-Zmiri-disable-stacked-borrows -Zmiri-disable-isolation" cargo miri test --tests --features io_ipc,io_ipc_compression,io_json_integration io::ipc::write::file::write_100_nested
89 changes: 76 additions & 13 deletions tests/it/io/ipc/write/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ fn round_trip(batch: RecordBatch) -> Result<()> {
// write IPC version 5
let written_result = {
let options = WriteOptions {
compression: Some(Compression::ZSTD),
compression: Some(Compression::LZ4),
};
let mut writer = FileWriter::try_new(result, batch.schema(), options)?;
writer.write(&batch)?;
Expand Down Expand Up @@ -85,39 +85,64 @@ fn test_file(version: &str, file_name: &str, compressed: bool) -> Result<()> {
#[test]
fn write_100_primitive() -> Result<()> {
test_file("1.0.0-littleendian", "generated_primitive", false)?;
test_file("1.0.0-bigendian", "generated_primitive", false)?;
test_file("1.0.0-bigendian", "generated_primitive", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_primitive() -> Result<()> {
test_file("1.0.0-littleendian", "generated_primitive", true)?;
test_file("1.0.0-bigendian", "generated_primitive", true)
}

#[test]
fn write_100_datetime() -> Result<()> {
test_file("1.0.0-littleendian", "generated_datetime", false)?;
test_file("1.0.0-bigendian", "generated_datetime", false)?;
test_file("1.0.0-bigendian", "generated_datetime", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_datetime() -> Result<()> {
test_file("1.0.0-littleendian", "generated_datetime", true)?;
test_file("1.0.0-bigendian", "generated_datetime", true)
}

#[test]
fn write_100_dictionary_unsigned() -> Result<()> {
test_file("1.0.0-littleendian", "generated_dictionary_unsigned", false)?;
test_file("1.0.0-bigendian", "generated_dictionary_unsigned", false)?;
test_file("1.0.0-bigendian", "generated_dictionary_unsigned", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_dictionary_unsigned() -> Result<()> {
test_file("1.0.0-littleendian", "generated_dictionary_unsigned", true)?;
test_file("1.0.0-bigendian", "generated_dictionary_unsigned", true)
}

#[test]
fn write_100_dictionary() -> Result<()> {
test_file("1.0.0-littleendian", "generated_dictionary", false)?;
test_file("1.0.0-bigendian", "generated_dictionary", false)?;
test_file("1.0.0-bigendian", "generated_dictionary", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_dictionary() -> Result<()> {
test_file("1.0.0-littleendian", "generated_dictionary", true)?;
test_file("1.0.0-bigendian", "generated_dictionary", true)
}

#[test]
fn write_100_interval() -> Result<()> {
test_file("1.0.0-littleendian", "generated_interval", false)?;
test_file("1.0.0-bigendian", "generated_interval", false)?;
test_file("1.0.0-bigendian", "generated_interval", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_interval() -> Result<()> {
test_file("1.0.0-littleendian", "generated_interval", true)?;
test_file("1.0.0-bigendian", "generated_interval", true)
}
Expand All @@ -132,7 +157,12 @@ fn write_100_large_batch() -> Result<()> {
#[test]
fn write_100_nested() -> Result<()> {
test_file("1.0.0-littleendian", "generated_nested", false)?;
test_file("1.0.0-bigendian", "generated_nested", false)?;
test_file("1.0.0-bigendian", "generated_nested", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_nested() -> Result<()> {
test_file("1.0.0-littleendian", "generated_nested", true)?;
test_file("1.0.0-bigendian", "generated_nested", true)
}
Expand All @@ -144,23 +174,38 @@ fn write_100_nested_large_offsets() -> Result<()> {
"generated_nested_large_offsets",
false,
)?;
test_file("1.0.0-bigendian", "generated_nested_large_offsets", false)?;
test_file("1.0.0-bigendian", "generated_nested_large_offsets", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_nested_large_offsets() -> Result<()> {
test_file("1.0.0-littleendian", "generated_nested_large_offsets", true)?;
test_file("1.0.0-bigendian", "generated_nested_large_offsets", true)
}

#[test]
fn write_100_null_trivial() -> Result<()> {
test_file("1.0.0-littleendian", "generated_null_trivial", false)?;
test_file("1.0.0-bigendian", "generated_null_trivial", false)?;
test_file("1.0.0-bigendian", "generated_null_trivial", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_null_trivial() -> Result<()> {
test_file("1.0.0-littleendian", "generated_null_trivial", true)?;
test_file("1.0.0-bigendian", "generated_null_trivial", true)
}

#[test]
fn write_100_null() -> Result<()> {
test_file("1.0.0-littleendian", "generated_null", false)?;
test_file("1.0.0-bigendian", "generated_null", false)?;
test_file("1.0.0-bigendian", "generated_null", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_null() -> Result<()> {
test_file("1.0.0-littleendian", "generated_null", true)?;
test_file("1.0.0-bigendian", "generated_null", true)
}
Expand All @@ -176,7 +221,12 @@ fn write_100_primitive_large_offsets() -> Result<()> {
"1.0.0-bigendian",
"generated_primitive_large_offsets",
false,
)?;
)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_primitive_large_offsets() -> Result<()> {
test_file(
"1.0.0-littleendian",
"generated_primitive_large_offsets",
Expand All @@ -192,7 +242,12 @@ fn write_100_primitive_no_batches() -> Result<()> {
"generated_primitive_no_batches",
false,
)?;
test_file("1.0.0-bigendian", "generated_primitive_no_batches", false)?;
test_file("1.0.0-bigendian", "generated_primitive_no_batches", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_primitive_no_batches() -> Result<()> {
test_file("1.0.0-littleendian", "generated_primitive_no_batches", true)?;
test_file("1.0.0-bigendian", "generated_primitive_no_batches", true)
}
Expand All @@ -204,7 +259,12 @@ fn write_100_primitive_zerolength() -> Result<()> {
"generated_primitive_zerolength",
false,
)?;
test_file("1.0.0-bigendian", "generated_primitive_zerolength", false)?;
test_file("1.0.0-bigendian", "generated_primitive_zerolength", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_primitive_zerolength() -> Result<()> {
test_file("1.0.0-littleendian", "generated_primitive_zerolength", true)?;
test_file("1.0.0-bigendian", "generated_primitive_zerolength", true)
}
Expand Down Expand Up @@ -262,6 +322,7 @@ fn write_generated_017_union() -> Result<()> {
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_boolean() -> Result<()> {
use std::sync::Arc;
let array = Arc::new(BooleanArray::from([
Expand All @@ -275,6 +336,7 @@ fn write_boolean() -> Result<()> {
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_sliced_utf8() -> Result<()> {
use std::sync::Arc;
let array = Arc::new(Utf8Array::<i32>::from_slice(["aa", "bb"]).slice(1, 1)) as Arc<dyn Array>;
Expand All @@ -283,6 +345,7 @@ fn write_sliced_utf8() -> Result<()> {
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_sliced_list() -> Result<()> {
let data = vec![
Some(vec![Some(1i32), Some(2), Some(3)]),
Expand Down
88 changes: 1 addition & 87 deletions tests/it/test_util.rs
Original file line number Diff line number Diff line change
@@ -1,89 +1,3 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! Utils to make testing easier

use std::{env, error::Error, path::PathBuf};

/// Returns the arrow test data directory, which is by default stored
/// in a git submodule rooted at `arrow/testing/data`.
///
/// The default can be overridden by the optional environment
/// variable `ARROW_TEST_DATA`
///
/// panics when the directory can not be found.
///
/// Example:
/// ```
/// let testdata = arrow::util::test_util::arrow_test_data();
/// let csvdata = format!("{}/csv/aggregate_test_100.csv", testdata);
/// assert!(std::path::PathBuf::from(csvdata).exists());
/// ```
pub fn arrow_test_data() -> String {
match get_data_dir("ARROW_TEST_DATA", "testing/arrow-testing/data") {
Ok(pb) => pb.display().to_string(),
Err(err) => panic!("failed to get arrow data dir: {}", err),
}
}

/// Returns a directory path for finding test data.
///
/// udf_env: name of an environment variable
///
/// submodule_dir: fallback path (relative to CARGO_MANIFEST_DIR)
///
/// Returns either:
/// The path referred to in `udf_env` if that variable is set and refers to a directory
/// The submodule_data directory relative to CARGO_MANIFEST_PATH
fn get_data_dir(udf_env: &str, submodule_data: &str) -> Result<PathBuf, Box<dyn Error>> {
// Try user defined env.
if let Ok(dir) = env::var(udf_env) {
let trimmed = dir.trim().to_string();
if !trimmed.is_empty() {
let pb = PathBuf::from(trimmed);
if pb.is_dir() {
return Ok(pb);
} else {
return Err(format!(
"the data dir `{}` defined by env {} not found",
pb.display().to_string(),
udf_env
)
.into());
}
}
}

// The env is undefined or its value is trimmed to empty, let's try default dir.

// env "CARGO_MANIFEST_DIR" is "the directory containing the manifest of your package",
// set by `cargo run` or `cargo test`, see:
// https://doc.rust-lang.org/cargo/reference/environment-variables.html
let dir = env!("CARGO_MANIFEST_DIR");

let pb = PathBuf::from(dir).join(submodule_data);
if pb.is_dir() {
Ok(pb)
} else {
Err(format!(
"env `{}` is undefined or has empty value, and the pre-defined data dir `{}` not found\n\
HINT: try running `git submodule update --init`",
udf_env,
pb.display().to_string(),
).into())
}
"testing/arrow-testing/data".to_string()
}

0 comments on commit 16287b2

Please sign in to comment.