Skip to content
This repository
Newer
Older
100644 213 lines (165 sloc) 5.794 kb
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
1 // xfail-pretty
2
3 // multi tasking k-nucleotide
4
e653d493 »
2012-09-18 rustc: Remove legacy mode inference, unless #[legacy_modes] is used
5 #[legacy_modes];
6
ea01ee2e »
2012-09-11 Convert 'use' to 'extern mod'. Remove old 'use' syntax
7 extern mod std;
f686896f »
2012-09-05 test: "import" -> "use"
8 use std::map;
cb7a5395 »
2012-09-10 Convert std::map to camel case
9 use std::map::HashMap;
f686896f »
2012-09-05 test: "import" -> "use"
10 use std::sort;
11 use io::ReaderUtil;
12 use pipes::{stream, Port, Chan};
10aa849d »
2012-09-10 Fix broken tests
13 use cmp::Ord;
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
14
15 // given a map, print a sorted version of it
cb7a5395 »
2012-09-10 Convert std::map to camel case
16 fn sort_and_fmt(mm: HashMap<~[u8], uint>, total: uint) -> ~str {
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
17 fn pct(xx: uint, yy: uint) -> float {
b355936b »
2012-08-01 Convert ret to return
18 return (xx as float) * 100f / (yy as float);
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
19 }
20
10aa849d »
2012-09-10 Fix broken tests
21 pure fn le_by_val<TT: Copy, UU: Copy Ord>(kv0: &(TT,UU),
97452c0c »
2012-08-02 Remove modes from map API and replace with regions.
22 kv1: &(TT,UU)) -> bool {
23 let (_, v0) = *kv0;
24 let (_, v1) = *kv1;
b355936b »
2012-08-01 Convert ret to return
25 return v0 >= v1;
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
26 }
27
10aa849d »
2012-09-10 Fix broken tests
28 pure fn le_by_key<TT: Copy Ord, UU: Copy>(kv0: &(TT,UU),
97452c0c »
2012-08-02 Remove modes from map API and replace with regions.
29 kv1: &(TT,UU)) -> bool {
30 let (k0, _) = *kv0;
31 let (k1, _) = *kv1;
b355936b »
2012-08-01 Convert ret to return
32 return k0 <= k1;
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
33 }
34
35 // sort by key, then by value
10aa849d »
2012-09-10 Fix broken tests
36 fn sortKV<TT: Copy Ord, UU: Copy Ord>(orig: ~[(TT,UU)]) -> ~[(TT,UU)] {
b355936b »
2012-08-01 Convert ret to return
37 return sort::merge_sort(le_by_val, sort::merge_sort(le_by_key, orig));
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
38 }
39
40 let mut pairs = ~[];
41
42 // map -> [(k,%)]
43 mm.each(fn&(key: ~[u8], val: uint) -> bool {
44 vec::push(pairs, (key, pct(val, total)));
b355936b »
2012-08-01 Convert ret to return
45 return true;
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
46 });
47
48 let pairs_sorted = sortKV(pairs);
49
92743dc2 »
2012-07-13 Move the world over to using the new style string literals and types.…
50 let mut buffer = ~"";
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
51
52 pairs_sorted.each(fn&(kv: (~[u8], float)) -> bool unsafe {
53 let (k,v) = kv;
308ca060 »
2012-09-12 Rename str::unsafe to str::raw
54 buffer += (fmt!("%s %0.3f\n", str::to_upper(str::raw::from_bytes(k)), v));
b355936b »
2012-08-01 Convert ret to return
55 return true;
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
56 });
57
b355936b »
2012-08-01 Convert ret to return
58 return buffer;
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
59 }
60
61 // given a map, search for the frequency of a pattern
cb7a5395 »
2012-09-10 Convert std::map to camel case
62 fn find(mm: HashMap<~[u8], uint>, key: ~str) -> uint {
9f591319 »
2012-08-23 Rename str::bytes to str::to_bytes
63 match mm.find(str::to_bytes(str::to_lower(key))) {
8337fa1a »
2012-08-20 Camel case the option type
64 option::None => { return 0u; }
65 option::Some(num) => { return num; }
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
66 }
67 }
68
69 // given a map, increment the counter for a key
cb7a5395 »
2012-09-10 Convert std::map to camel case
70 fn update_freq(mm: HashMap<~[u8], uint>, key: &[u8]) {
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
71 let key = vec::slice(key, 0, key.len());
ecaf9e39 »
2012-08-06 Convert alt to match. Stop parsing alt
72 match mm.find(key) {
8337fa1a »
2012-08-20 Camel case the option type
73 option::None => { mm.insert(key, 1u ); }
74 option::Some(val) => { mm.insert(key, 1u + val); }
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
75 }
76 }
77
78 // given a ~[u8], for each window call a function
79 // i.e., for "hello" and windows of size four,
80 // run it("hell") and it("ello"), then return "llo"
c918bd01 »
2012-07-26 Fix borrow check errors in k-nucleotide.
81 fn windows_with_carry(bb: &[u8], nn: uint,
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
82 it: fn(window: &[u8])) -> ~[u8] {
83 let mut ii = 0u;
84
85 let len = vec::len(bb);
86 while ii < len - (nn - 1u) {
65beca4e »
2012-07-26 Use iteration protocol for ebml, use vec::view in more places (issue #…
87 it(vec::view(bb, ii, ii+nn));
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
88 ii += 1u;
89 }
90
b355936b »
2012-08-01 Convert ret to return
91 return vec::slice(bb, len - (nn - 1u), len);
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
92 }
93
cfbc7cbd »
2012-08-28 Convert core::pipes to camel case
94 fn make_sequence_processor(sz: uint, from_parent: pipes::Port<~[u8]>,
95 to_parent: pipes::Chan<~str>) {
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
96
cb7a5395 »
2012-09-10 Convert std::map to camel case
97 let freqs: HashMap<~[u8], uint> = map::bytes_hash();
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
98 let mut carry: ~[u8] = ~[];
99 let mut total: uint = 0u;
100
101 let mut line: ~[u8];
102
103 loop {
104
105 line = from_parent.recv();
106 if line == ~[] { break; }
107
108 carry = windows_with_carry(carry + line, sz, |window| {
109 update_freq(freqs, window);
110 total += 1u;
111 });
112 }
113
ecaf9e39 »
2012-08-06 Convert alt to match. Stop parsing alt
114 let buffer = match sz {
025d8662 »
2012-08-03 Switch alts to use arrows
115 1u => { sort_and_fmt(freqs, total) }
116 2u => { sort_and_fmt(freqs, total) }
29f32b4a »
2012-08-22 `m1!{...}` -> `m1!(...)`
117 3u => { fmt!("%u\t%s", find(freqs, ~"GGT"), ~"GGT") }
118 4u => { fmt!("%u\t%s", find(freqs, ~"GGTA"), ~"GGTA") }
119 6u => { fmt!("%u\t%s", find(freqs, ~"GGTATT"), ~"GGTATT") }
120 12u => { fmt!("%u\t%s", find(freqs, ~"GGTATTTTAATT"), ~"GGTATTTTAATT") }
121 18u => { fmt!("%u\t%s", find(freqs, ~"GGTATTTTAATTTATAGT"), ~"GGTATTTTAATTTATAGT") }
025d8662 »
2012-08-03 Switch alts to use arrows
122 _ => { ~"" }
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
123 };
124
29f32b4a »
2012-08-22 `m1!{...}` -> `m1!(...)`
125 //comm::send(to_parent, fmt!("yay{%u}", sz));
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
126 to_parent.send(buffer);
127 }
128
129 // given a FASTA file on stdin, process sequence THREE
92743dc2 »
2012-07-13 Move the world over to using the new style string literals and types.…
130 fn main(args: ~[~str]) {
131 let rdr = if os::getenv(~"RUST_BENCH").is_some() {
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
132 // FIXME: Using this compile-time env variable is a crummy way to
133 // get to this massive data set, but #include_bin chokes on it (#2598)
c284b8b1 »
2012-08-24 Start using core::path2::Path in a lot of places.
134 let path = Path(env!("CFG_SRC_DIR"))
135 .push_rel(&Path("src/test/bench/shootout-k-nucleotide.data"));
136 result::get(io::file_reader(&path))
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
137 } else {
138 io::stdin()
139 };
140
141
142
143 // initialize each sequence sorter
144 let sizes = ~[1u,2u,3u,4u,6u,12u,18u];
8337fa1a »
2012-08-20 Camel case the option type
145 let streams = vec::map(sizes, |_sz| Some(stream()));
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
146 let streams = vec::to_mut(streams);
147 let mut from_child = ~[];
148 let to_child = vec::mapi(sizes, |ii, sz| {
8337fa1a »
2012-08-20 Camel case the option type
149 let mut stream = None;
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
150 stream <-> streams[ii];
151 let (to_parent_, from_child_) = option::unwrap(stream);
152
153 vec::push(from_child, from_child_);
154
22e955a7 »
2012-07-10 Move streams into core.
155 let (to_child, from_parent) = pipes::stream();
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
156
157 do task::spawn_with(from_parent) |from_parent| {
158 make_sequence_processor(sz, from_parent, to_parent_);
159 };
160
161 to_child
162 });
163
164
165 // latch stores true after we've started
166 // reading the sequence of interest
167 let mut proc_mode = false;
168
169 while !rdr.eof() {
92743dc2 »
2012-07-13 Move the world over to using the new style string literals and types.…
170 let line: ~str = rdr.read_line();
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
171
2810ea9a »
2012-09-07 Convert 'again' to 'loop'. Remove 'again' keyword
172 if str::len(line) == 0u { loop; }
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
173
ecaf9e39 »
2012-08-06 Convert alt to match. Stop parsing alt
174 match (line[0], proc_mode) {
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
175
176 // start processing if this is the one
025d8662 »
2012-08-03 Switch alts to use arrows
177 ('>' as u8, false) => {
ecaf9e39 »
2012-08-06 Convert alt to match. Stop parsing alt
178 match str::find_str_from(line, ~"THREE", 1u) {
8337fa1a »
2012-08-20 Camel case the option type
179 option::Some(_) => { proc_mode = true; }
180 option::None => { }
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
181 }
182 }
183
184 // break our processing
025d8662 »
2012-08-03 Switch alts to use arrows
185 ('>' as u8, true) => { break; }
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
186
187 // process the sequence for k-mers
025d8662 »
2012-08-03 Switch alts to use arrows
188 (_, true) => {
9f591319 »
2012-08-23 Rename str::bytes to str::to_bytes
189 let line_bytes = str::to_bytes(line);
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
190
191 for sizes.eachi |ii, _sz| {
192 let mut lb = line_bytes;
193 to_child[ii].send(lb);
194 }
195 }
196
197 // whatever
025d8662 »
2012-08-03 Switch alts to use arrows
198 _ => { }
b925648a »
2012-07-06 Added a k-nucleotide version that uses pipes. 31% speedup.
199 }
200 }
201
202 // finish...
203 for sizes.eachi |ii, _sz| {
204 to_child[ii].send(~[]);
205 }
206
207 // now fetch and print result messages
208 for sizes.eachi |ii, _sz| {
209 io::println(from_child[ii].recv());
210 }
211 }
212
Something went wrong with that request. Please try again.