Skip to content

Commit

Permalink
Merge pull request #432 from streamdal/blinktag/wasm_fix_detective
Browse files Browse the repository at this point in the history
wasm: Fix plaintext detection in mixed plaintext/json data
  • Loading branch information
blinktag committed May 15, 2024
2 parents e1d1b25 + 11b34f2 commit 73af2f2
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 1 deletion.
2 changes: 1 addition & 1 deletion apps/server/assets/wasm/download-wasm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fi

# Get the version from the version file
version=$(cat version)
download_url="https://github.com/streamdal/streamdal/releases/download/libs%2Fwasm%2F${version}/release.zip"
download_url="https://github.com/streamdal/streamdal/releases/download/libs/wasm/${version}/release.zip"

# Remove previous release.zip (if exists)
rm -f release.zip
Expand Down
7 changes: 7 additions & 0 deletions libs/wasm-detective/src/detective.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,11 +458,18 @@ pub fn plaintext(request: &Request, input: &str) -> Vec<DetectiveStepResultMatch
// split on them since they might be inside an email, ipv6, macaddr, etc.
let no_split: HashMap<char, usize> = HashMap::from([('@', 0), (':', 0)]);

// Ignore these characters so that we can properly find PII inside JSON that is inside a plaintext string
let ignore_chars = HashMap::from([('"', 0), ('{', 0), ('}', 0), ('\'', 0)]);

// This is our word-part accumulator and is used to
// combine "user", "@", "somedomain.com" words into a single word
let mut accum: Vec<Word> = Vec::new();

for (word_start, word) in words {
if word.len() == 1 && ignore_chars.contains_key(&word.chars().next().unwrap()) {
continue;
}

// We've reached the end of a word, combine our accumulator into a single Word struct
if word == " " {
// Loop over accumulator and join the string value of each word
Expand Down
20 changes: 20 additions & 0 deletions libs/wasm-detective/src/matcher_pii_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -940,6 +940,26 @@ fn test_plaintext() {
// assert_eq!(&results[3].pii_type, "Billing");
}

#[test]
fn test_plaintext_mixed() {
let sample_text = "2024-04-29T15:59:41.60221515Z stdout Exporting data {\"user\": {\"ccnum\": \"4111111111111111\"}} to billing service";

let request = &Request {
match_type: DetectiveType::DETECTIVE_TYPE_PII_PLAINTEXT_ANY,
data: &sample_text.as_bytes().to_vec(),
path: "".to_string(),
args: Vec::new(),
negate: false,
mode: DETECTIVE_TYPE_PII_KEYWORD_MODE_PERFORMANCE,
data_format: PIPELINE_DATA_FORMAT_JSON,
};

let results = crate::detective::Detective::new().matches(&request).unwrap();

assert_eq!(results.len(), 1);

}

#[bench]
fn bench_plaintext(b: &mut Bencher) {
b.iter(|| {
Expand Down
Binary file modified libs/wasm/build/detective.wasm
Binary file not shown.
Binary file modified libs/wasm/build/kv.wasm
Binary file not shown.

0 comments on commit 73af2f2

Please sign in to comment.