/
finch.rs
88 lines (67 loc) · 2.47 KB
/
finch.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#[cfg(feature = "from-finch")]
extern crate finch;
#[cfg(feature = "from-finch")]
extern crate needletail;
use std::collections::HashMap;
use std::collections::HashSet;
use std::iter::FromIterator;
use sourmash::KmerMinHash;
#[cfg(feature = "from-finch")]
use finch::minhashes::MinHashKmers;
#[cfg(feature = "from-finch")]
use needletail::kmer::canonical;
#[cfg(feature = "from-finch")]
#[test]
fn finch_behavior() {
let mut a = KmerMinHash::new(20, 10, false, 42, 0, true);
let mut b = MinHashKmers::new(20, 42);
let seq = b"TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA";
a.add_sequence(seq, false);
for kmer in seq.windows(10) {
b.push(&canonical(kmer), 0);
}
let b_hashes = b.into_vec();
let s1: HashSet<_> = HashSet::from_iter(a.mins.iter().map(|x| *x));
let s2: HashSet<_> = HashSet::from_iter(b_hashes.iter().map(|x| x.hash as u64));
let i1 = &s1 & &s2;
assert!(i1.len() == a.mins.len());
assert!(i1.len() == b_hashes.len());
if let Some(abunds) = a.abunds {
let smap: HashMap<_, _> = HashMap::from_iter(a.mins.iter().zip(abunds.iter()));
println!("{:?}", smap);
for item in b_hashes.iter() {
assert!(smap.contains_key(&(item.hash as u64)));
assert!(
**smap.get(&(item.hash as u64)).unwrap()
== ((item.count + item.extra_count) as u64)
);
}
}
}
#[cfg(feature = "from-finch")]
#[test]
fn from_finch() {
let mut a = KmerMinHash::new(20, 10, false, 42, 0, true);
let mut b = MinHashKmers::new(20, 42);
let seq = b"TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA";
a.add_sequence(seq, false);
for kmer in seq.windows(10) {
b.push(&canonical(kmer), 0);
}
let c = KmerMinHash::from(b);
let s1: HashSet<_> = HashSet::from_iter(a.mins.iter().map(|x| *x));
let s2: HashSet<_> = HashSet::from_iter(c.mins.iter().map(|x| *x));
let i1 = &s1 & &s2;
assert!(i1.len() == a.mins.len());
assert!(i1.len() == c.mins.len());
if let Some(a_abunds) = a.abunds {
if let Some(c_abunds) = c.abunds {
let a_smap: HashMap<_, _> = HashMap::from_iter(a.mins.iter().zip(a_abunds.iter()));
let c_smap: HashMap<_, _> = HashMap::from_iter(c.mins.iter().zip(c_abunds.iter()));
for item in a_smap.iter() {
assert!(c_smap.contains_key(*item.0));
assert!(c_smap.get(*item.0).unwrap() == item.1);
}
}
}
}