Skip to content

Commit

Permalink
optimization: Use fxhash again but seed with random data
Browse files Browse the repository at this point in the history
  • Loading branch information
koraa committed Jan 19, 2020
1 parent 8eaafd7 commit c2658a9
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 132 deletions.
142 changes: 14 additions & 128 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@ keywords = ["cli", "uniq"]
[dependencies]
clap = "^2.33.0"
anyhow = "^1.0.26"
ahash = "^0.2.18"
fxhash = "^0.2.1"
sysconf = "^0.3.4"
getrandom = "0.1.14"
52 changes: 49 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use std::collections::{HashSet, HashMap, hash_map};
use std::hash::{Hasher, BuildHasher};
use std::hash::{Hash, Hasher, BuildHasher};
use std::io::{stdin, Read, BufRead, BufReader, stdout, Write, BufWriter, ErrorKind};
use std::{slice, default::Default, marker::PhantomData};
use sysconf::page::pagesize;
use anyhow::Result;
use clap::{Arg, App};
use ahash::ABuildHasher;
use fxhash::FxHasher64;
use getrandom::getrandom;


/// A no-operation hasher. Used as part of the uniq implementation,
Expand Down Expand Up @@ -40,6 +41,51 @@ impl<H: Hasher + Default> BuildHasher for BuildDefaultHasher<H> {
}
}

/// Like RandomState, but for arbitrary hash functions.
/// Works by fetching a random 64bit value from the system
/// cryptographically secure RNG at creation
/// and supplying that as the first value to hash when
/// creating Hashers
struct BuildRandomStateHasher<H: Hasher + Default> {
seed: u64,
_phantom: PhantomData<H>
}

impl<H: Hasher + Default> BuildRandomStateHasher<H> {
fn new() -> Self {
let mut buf = [0u8; 8];

// Handle errors in getrandom() by just retrying up to 10 times.
let mut cnt = 0;
loop {
let res = getrandom(&mut buf);

if res.is_ok() {
break;
} else if cnt > 10 {
res.unwrap();
}

cnt += 1;
}

BuildRandomStateHasher {
seed: u64::from_ne_bytes(buf),
_phantom: PhantomData
}
}
}

impl<H: Hasher + Default> BuildHasher for BuildRandomStateHasher<H> {
type Hasher = H;

fn build_hasher(&self) -> Self::Hasher {
let mut r = H::default();
self.seed.hash(&mut r);
r
}
}

/// Hash the given value with the given BuildHasher. Now.
fn hash<T: BuildHasher, U: std::hash::Hash + ?Sized>(build: &T, v: &U) -> u64 {
let mut s = build.build_hasher();
Expand Down Expand Up @@ -155,7 +201,7 @@ fn uniq_cmd(delim: u8) -> Result<()> {
// Line processing/output ///////////////////////
let out = stdout();
let inp = stdin();
let hasher = ABuildHasher::new();
let hasher = BuildRandomStateHasher::<FxHasher64>::new();
let mut out = BufWriter::new(out.lock());
let mut set = HashSet::<u64, BuildDefaultHasher<IdentityHasher>>::default();

Expand Down

0 comments on commit c2658a9

Please sign in to comment.