/
minhash.rs
111 lines (95 loc) · 2.96 KB
/
minhash.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#[macro_use]
extern crate criterion;
use std::fs::File;
use std::io::BufReader;
use std::path::PathBuf;
use sourmash::signature::{Signature, SigsTrait};
use sourmash::sketch::minhash::{KmerMinHash, KmerMinHashBTree};
use sourmash::sketch::Sketch;
use criterion::Criterion;
fn intersection(c: &mut Criterion) {
let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
filename.push("../../tests/test-data/gather-abund/genome-s10.fa.gz.sig");
let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let mut sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let mh = if let Sketch::MinHash(mh) = &sigs.swap_remove(0).sketches()[0] {
mh.clone()
} else {
unimplemented!()
};
let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
filename.push("../../tests/test-data/gather-abund/genome-s11.fa.gz.sig");
let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let mut sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let mh2 = if let Sketch::MinHash(mh) = &sigs.swap_remove(0).sketches()[0] {
mh.clone()
} else {
unimplemented!()
};
let mut group = c.benchmark_group("minhash");
group.sample_size(10);
group.bench_function("intersection", |b| {
b.iter(|| {
mh.intersection(&mh2).unwrap();
});
});
group.bench_function("intersection_size", |b| {
b.iter(|| {
mh.intersection_size(&mh2).unwrap();
});
});
let mut mh1 = KmerMinHash::builder()
.num(0)
.max_hash(1_000_000)
.ksize(21)
.build();
let mut mh2 = KmerMinHash::builder()
.num(0)
.max_hash(1_000_000)
.ksize(21)
.build();
let mut mh1_btree = KmerMinHashBTree::builder()
.num(0)
.max_hash(1_000_000)
.ksize(21)
.build();
let mut mh2_btree = KmerMinHashBTree::builder()
.num(0)
.max_hash(1_000_000)
.ksize(21)
.build();
for i in 0..=1_000_000 {
if i % 2 == 0 {
mh1.add_hash(i);
mh1_btree.add_hash(i);
}
if i % 45 == 0 {
mh2.add_hash(i);
mh2_btree.add_hash(i);
}
}
group.bench_function("large intersection", |b| {
b.iter(|| {
mh1.intersection(&mh2).unwrap();
});
});
group.bench_function("large intersection_size", |b| {
b.iter(|| {
mh1.intersection_size(&mh2).unwrap();
});
});
group.bench_function("large intersection btree", |b| {
b.iter(|| {
mh1_btree.intersection(&mh2_btree).unwrap();
});
});
group.bench_function("large intersection_size btree", |b| {
b.iter(|| {
mh1_btree.intersection_size(&mh2_btree).unwrap();
});
});
}
criterion_group!(minhash, intersection);
criterion_main!(minhash);