Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 199 lines (155 sloc) 5.348 kb
748bd12 @brson bench: xfail-pretty shootout-k-nucleotide
brson authored
1 // xfail-pretty
2
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
3 // multi tasking k-nucleotide
4
5 import io::reader_util;
6
7 use std;
8 import std::map;
9 import std::map::hashmap;
10 import std::sort;
11
12 // given a map, print a sorted version of it
92743dc @msullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
13 fn sort_and_fmt(mm: hashmap<~[u8], uint>, total: uint) -> ~str {
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
14 fn pct(xx: uint, yy: uint) -> float {
15 ret (xx as float) * 100f / (yy as float);
16 }
17
18 fn le_by_val<TT: copy, UU: copy>(kv0: (TT,UU), kv1: (TT,UU)) -> bool {
19 let (_, v0) = kv0;
20 let (_, v1) = kv1;
21 ret v0 >= v1;
22 }
23
24 fn le_by_key<TT: copy, UU: copy>(kv0: (TT,UU), kv1: (TT,UU)) -> bool {
25 let (k0, _) = kv0;
26 let (k1, _) = kv1;
27 ret k0 <= k1;
28 }
29
30 // sort by key, then by value
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes #...
msullivan authored
31 fn sortKV<TT: copy, UU: copy>(orig: ~[(TT,UU)]) -> ~[(TT,UU)] {
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
32 ret sort::merge_sort(le_by_val, sort::merge_sort(le_by_key, orig));
33 }
34
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes #...
msullivan authored
35 let mut pairs = ~[];
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
36
37 // map -> [(k,%)]
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes #...
msullivan authored
38 mm.each(fn&(key: ~[u8], val: uint) -> bool {
b19c98e @eholk Some perf fixes, although vec::slice is still too slow (Issue #2719)
eholk authored
39 vec::push(pairs, (key, pct(val, total)));
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
40 ret true;
41 });
42
43 let pairs_sorted = sortKV(pairs);
44
92743dc @msullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
45 let mut buffer = ~"";
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
46
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes #...
msullivan authored
47 pairs_sorted.each(fn&(kv: (~[u8], float)) -> bool unsafe {
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
48 let (k,v) = kv;
49 buffer += (#fmt["%s %0.3f\n", str::to_upper(str::unsafe::from_bytes(k)), v]);
50 ret true;
51 });
52
53 ret buffer;
54 }
55
56 // given a map, search for the frequency of a pattern
92743dc @msullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
57 fn find(mm: hashmap<~[u8], uint>, key: ~str) -> uint {
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
58 alt mm.find(str::bytes(str::to_lower(key))) {
59 option::none { ret 0u; }
60 option::some(num) { ret num; }
61 }
62 }
63
64 // given a map, increment the counter for a key
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes #...
msullivan authored
65 fn update_freq(mm: hashmap<~[u8], uint>, key: &[u8]) {
a082816 @eholk More perf tweaks (issue #2719)
eholk authored
66 let key = vec::slice(key, 0, key.len());
67 alt mm.find(key) {
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
68 option::none { mm.insert(key, 1u ); }
69 option::some(val) { mm.insert(key, 1u + val); }
a082816 @eholk More perf tweaks (issue #2719)
eholk authored
70 }
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
71 }
72
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes #...
msullivan authored
73 // given a ~[u8], for each window call a function
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
74 // i.e., for "hello" and windows of size four,
75 // run it("hell") and it("ello"), then return "llo"
c918bd0 @eholk Fix borrow check errors in k-nucleotide.
eholk authored
76 fn windows_with_carry(bb: &[u8], nn: uint,
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes #...
msullivan authored
77 it: fn(window: &[u8])) -> ~[u8] {
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
78 let mut ii = 0u;
79
80 let len = vec::len(bb);
81 while ii < len - (nn - 1u) {
65beca4 @eholk Use iteration protocol for ebml, use vec::view in more places (issue #28...
eholk authored
82 it(vec::view(bb, ii, ii+nn));
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
83 ii += 1u;
84 }
85
86 ret vec::slice(bb, len - (nn - 1u), len);
87 }
88
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes #...
msullivan authored
89 fn make_sequence_processor(sz: uint, from_parent: comm::port<~[u8]>,
92743dc @msullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
90 to_parent: comm::chan<~str>) {
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
91
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes #...
msullivan authored
92 let freqs: hashmap<~[u8], uint> = map::bytes_hash();
93 let mut carry: ~[u8] = ~[];
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
94 let mut total: uint = 0u;
95
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes #...
msullivan authored
96 let mut line: ~[u8];
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
97
98 loop {
99
100 line = comm::recv(from_parent);
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes #...
msullivan authored
101 if line == ~[] { break; }
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
102
d1fc2b5 @brson Convert to new closure syntax
brson authored
103 carry = windows_with_carry(carry + line, sz, |window| {
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
104 update_freq(freqs, window);
105 total += 1u;
106 });
107 }
108
109 let buffer = alt sz {
110 1u { sort_and_fmt(freqs, total) }
111 2u { sort_and_fmt(freqs, total) }
92743dc @msullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
112 3u { #fmt["%u\t%s", find(freqs, ~"GGT"), ~"GGT"] }
113 4u { #fmt["%u\t%s", find(freqs, ~"GGTA"), ~"GGTA"] }
114 6u { #fmt["%u\t%s", find(freqs, ~"GGTATT"), ~"GGTATT"] }
115 12u { #fmt["%u\t%s", find(freqs, ~"GGTATTTTAATT"), ~"GGTATTTTAATT"] }
116 18u { #fmt["%u\t%s", find(freqs, ~"GGTATTTTAATTTATAGT"), ~"GGTATTTTAATTTATAGT"] }
117 _ { ~"" }
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
118 };
119
120 //comm::send(to_parent, #fmt["yay{%u}", sz]);
121 comm::send(to_parent, buffer);
122 }
123
124 // given a FASTA file on stdin, process sequence THREE
92743dc @msullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
125 fn main(args: ~[~str]) {
126 let rdr = if os::getenv(~"RUST_BENCH").is_some() {
03e186c @brson bench: Modify shootout-k-nucleotide to not read from the filesystem
brson authored
127 // FIXME: Using this compile-time env variable is a crummy way to
13d4b61 @catamorphism Comments only: annotate FIXMEs in tests
catamorphism authored
128 // get to this massive data set, but #include_bin chokes on it (#2598)
03e186c @brson bench: Modify shootout-k-nucleotide to not read from the filesystem
brson authored
129 let path = path::connect(
130 #env("CFG_SRC_DIR"),
92743dc @msullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
131 ~"src/test/bench/shootout-k-nucleotide.data"
03e186c @brson bench: Modify shootout-k-nucleotide to not read from the filesystem
brson authored
132 );
133 result::get(io::file_reader(path))
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
134 } else {
135 io::stdin()
136 };
137
138
139
140 // initialize each sequence sorter
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes #...
msullivan authored
141 let sizes = ~[1u,2u,3u,4u,6u,12u,18u];
d1fc2b5 @brson Convert to new closure syntax
brson authored
142 let from_child = vec::map (sizes, |_sz| comm::port() );
143 let to_parent = vec::mapi(sizes, |ii, _sz| comm::chan(from_child[ii]) );
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes #...
msullivan authored
144 let to_child = vec::mapi(sizes, fn@(ii: uint, sz: uint) -> comm::chan<~[u8]> {
d1fc2b5 @brson Convert to new closure syntax
brson authored
145 ret do task::spawn_listener |from_parent| {
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
146 make_sequence_processor(sz, from_parent, to_parent[ii]);
147 };
148 });
149
150
151 // latch stores true after we've started
152 // reading the sequence of interest
153 let mut proc_mode = false;
154
155 while !rdr.eof() {
92743dc @msullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
156 let line: ~str = rdr.read_line();
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
157
c26d025 @graydon Switch 'cont' to 'again' everywhere. Close #2229.
graydon authored
158 if str::len(line) == 0u { again; }
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
159
160 alt (line[0], proc_mode) {
161
162 // start processing if this is the one
163 ('>' as u8, false) {
92743dc @msullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
164 alt str::find_str_from(line, ~"THREE", 1u) {
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
165 option::some(_) { proc_mode = true; }
166 option::none { }
167 }
168 }
169
170 // break our processing
171 ('>' as u8, true) { break; }
172
173 // process the sequence for k-mers
174 (_, true) {
175 let line_bytes = str::bytes(line);
176
d1fc2b5 @brson Convert to new closure syntax
brson authored
177 for sizes.eachi |ii, _sz| {
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
178 let mut lb = line_bytes;
179 comm::send(to_child[ii], lb);
180 }
181 }
182
183 // whatever
184 _ { }
185 }
186 }
187
188 // finish...
d1fc2b5 @brson Convert to new closure syntax
brson authored
189 for sizes.eachi |ii, _sz| {
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes #...
msullivan authored
190 comm::send(to_child[ii], ~[]);
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
191 }
192
193 // now fetch and print result messages
d1fc2b5 @brson Convert to new closure syntax
brson authored
194 for sizes.eachi |ii, _sz| {
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
195 io::println(comm::recv(from_child[ii]));
196 }
197 }
198
Something went wrong with that request. Please try again.