Skip to content

Commit

Permalink
adding dupe detector
Browse files Browse the repository at this point in the history
  • Loading branch information
noahgift committed Jul 21, 2023
1 parent e778605 commit 32d3fe4
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 0 deletions.
11 changes: 11 additions & 0 deletions sha3-dupe-detector/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "sha3-dupe-detector"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
sha3 = "0.10.8"
rand = "0.8.4"
hex = "0.4.3"
13 changes: 13 additions & 0 deletions sha3-dupe-detector/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
format:
cargo fmt --quiet

lint:
cargo clippy --quiet

test:
cargo test --quiet

run:
cargo run

all: format lint test run
67 changes: 67 additions & 0 deletions sha3-dupe-detector/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
use rand::prelude::SliceRandom;
use rand::thread_rng;
use rand::Rng;
use sha3::Digest;
use sha3::Sha3_256;
use std::collections::HashMap;

// List of phrases
static PHRASES: [&str; 10] = [
"man can be destroyed but not defeated",
"but man is not made for defeat",
"a man can be destroyed but not defeated",
"the old man was thin and gaunt",
"everything about him was old",
"the sail was patched with flour sacks",
"he was an old man who fished alone",
"the old man had taught the boy to fish",
"the old man looked at him with his sun burned confident loving eyes",
"his eyes were cheerful and undefeated",
];

// Generate random phrases
pub fn generate_random_phrases() -> Vec<&'static str> {
let mut rng = thread_rng();
let mut phrases = Vec::new();

for &phrase in PHRASES.iter() {
let copies = rng.gen_range(1..=3);

for _ in 0..copies {
phrases.push(phrase);
}
}

phrases.shuffle(&mut rng);

phrases
}

// Analyze duplicates
pub fn analyze_duplicates(phrases: &[&str]) {
let mut hashes: HashMap<_, (usize, &str)> = HashMap::new();
println!("Total number of phrases: {}", phrases.len());

for phrase in phrases {
let hash = Sha3_256::digest(phrase.as_bytes());
let entry = hashes.entry(hash).or_insert((0, phrase));
entry.0 += 1;
}

let total_unique_phrases = hashes.len();

let mut total_unique_duplicates = 0;
let mut total_combined_duplicates = 0;

for (hash, (count, phrase)) in &hashes {
if *count > 1 {
total_unique_duplicates += 1;
total_combined_duplicates += count - 1; // subtract one to exclude the original
println!("{} - {} times: {}", hex::encode(hash), count, phrase);
}
}

println!("Total Unique Phrases: {}", total_unique_phrases);
println!("Total Unique Duplicates: {}", total_unique_duplicates);
println!("Total Combined Duplicates: {}", total_combined_duplicates);
}
27 changes: 27 additions & 0 deletions sha3-dupe-detector/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
Generates random duplicate phrases from a list of phrases
and prints the number of unique phrases and the number of duplicate phrases.
Example output:
Total number of phrases: 24
131a931202f9f1e7821ece767d0a9041aeb0270a40def3583de149c849683cb2 - 3 times: the old man looked at him with his sun burned confident loving eyes
ad604cf092d30c844c8f1820de47771efef7a66763468cd2a68bbed8637579d2 - 3 times: his eyes were cheerful and undefeated
796599a1f14554fde9514bf41dca747548570d3a7a38a74cc19caa07ae55ca70 - 2 times: a man can be destroyed but not defeated
6931bbb80ba33090d9e7015551e7e21676a4813d50238b1e75c9a9ce38845b1e - 2 times: but man is not made for defeat
864fd2e3cac4fa99278594867c3accc16aacfb3f543e0ed53bf71cd8d23273e1 - 3 times: man can be destroyed but not defeated
0a9247da949c7ff1ff5b64bf3482a46cb90b87de9cade88ca26ff4bd101b1017 - 3 times: everything about him was old
7f7f417bb4ff8b62b19edc25c3e359c6a32f7d2f06883ce40062eae093a23ad1 - 3 times: the old man had taught the boy to fish
c33fc7b48db132dfdc5fc6aac5514ad867fc35b21c06e55725720057034c6a56 - 2 times: he was an old man who fished alone
d40a2c3a380fbb5f5b15db16a9acb87b14c91906aac595e6f920827ee6187ef6 - 2 times: the sail was patched with flour sacks
Total Unique Phrases: 10
Total Unique Duplicates: 9
Total Combined Duplicates: 14
*/
use sha3_dupe_detector::generate_random_phrases;

fn main() {
let phrases = generate_random_phrases();
sha3_dupe_detector::analyze_duplicates(&phrases);
}

0 comments on commit 32d3fe4

Please sign in to comment.