Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 196 lines (152 sloc) 5.212 kb
748bd12 @brson bench: xfail-pretty shootout-k-nucleotide
brson authored
1 // xfail-pretty
2
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
3 // multi tasking k-nucleotide
4
5 import io::reader_util;
6
7 use std;
8 import std::map;
9 import std::map::hashmap;
10 import std::sort;
11
12 // given a map, print a sorted version of it
13 fn sort_and_fmt(mm: hashmap<[u8], uint>, total: uint) -> str {
14 fn pct(xx: uint, yy: uint) -> float {
15 ret (xx as float) * 100f / (yy as float);
16 }
17
18 fn le_by_val<TT: copy, UU: copy>(kv0: (TT,UU), kv1: (TT,UU)) -> bool {
19 let (_, v0) = kv0;
20 let (_, v1) = kv1;
21 ret v0 >= v1;
22 }
23
24 fn le_by_key<TT: copy, UU: copy>(kv0: (TT,UU), kv1: (TT,UU)) -> bool {
25 let (k0, _) = kv0;
26 let (k1, _) = kv1;
27 ret k0 <= k1;
28 }
29
30 // sort by key, then by value
31 fn sortKV<TT: copy, UU: copy>(orig: [(TT,UU)]) -> [(TT,UU)] {
32 ret sort::merge_sort(le_by_val, sort::merge_sort(le_by_key, orig));
33 }
34
35 let mut pairs = [];
36
37 // map -> [(k,%)]
38 mm.each(fn&(key: [u8], val: uint) -> bool {
39 pairs += [(key, pct(val, total))];
40 ret true;
41 });
42
43 let pairs_sorted = sortKV(pairs);
44
45 let mut buffer = "";
46
47 pairs_sorted.each(fn&(kv: ([u8], float)) -> bool unsafe {
48 let (k,v) = kv;
49 buffer += (#fmt["%s %0.3f\n", str::to_upper(str::unsafe::from_bytes(k)), v]);
50 ret true;
51 });
52
53 ret buffer;
54 }
55
56 // given a map, search for the frequency of a pattern
57 fn find(mm: hashmap<[u8], uint>, key: str) -> uint {
58 alt mm.find(str::bytes(str::to_lower(key))) {
59 option::none { ret 0u; }
60 option::some(num) { ret num; }
61 }
62 }
63
64 // given a map, increment the counter for a key
65 fn update_freq(mm: hashmap<[u8], uint>, key: [u8]) {
66 alt mm.find(key) {
67 option::none { mm.insert(key, 1u ); }
68 option::some(val) { mm.insert(key, 1u + val); }
69 }
70 }
71
72 // given a [u8], for each window call a function
73 // i.e., for "hello" and windows of size four,
74 // run it("hell") and it("ello"), then return "llo"
75 fn windows_with_carry(bb: [const u8], nn: uint, it: fn(window: [u8])) -> [u8] {
76 let mut ii = 0u;
77
78 let len = vec::len(bb);
79 while ii < len - (nn - 1u) {
80 it(vec::slice(bb, ii, ii+nn));
81 ii += 1u;
82 }
83
84 ret vec::slice(bb, len - (nn - 1u), len);
85 }
86
87 fn make_sequence_processor(sz: uint, from_parent: comm::port<[u8]>, to_parent: comm::chan<str>) {
88
89 let freqs: hashmap<[u8], uint> = map::bytes_hash();
90 let mut carry: [u8] = [];
91 let mut total: uint = 0u;
92
93 let mut line: [u8];
94
95 loop {
96
97 line = comm::recv(from_parent);
98 if line == [] { break; }
99
100 carry = windows_with_carry(carry + line, sz, { |window|
101 update_freq(freqs, window);
102 total += 1u;
103 });
104 }
105
106 let buffer = alt sz {
107 1u { sort_and_fmt(freqs, total) }
108 2u { sort_and_fmt(freqs, total) }
109 3u { #fmt["%u\t%s", find(freqs, "GGT"), "GGT"] }
110 4u { #fmt["%u\t%s", find(freqs, "GGTA"), "GGTA"] }
111 6u { #fmt["%u\t%s", find(freqs, "GGTATT"), "GGTATT"] }
112 12u { #fmt["%u\t%s", find(freqs, "GGTATTTTAATT"), "GGTATTTTAATT"] }
113 18u { #fmt["%u\t%s", find(freqs, "GGTATTTTAATTTATAGT"), "GGTATTTTAATTTATAGT"] }
114 _ { "" }
115 };
116
117 //comm::send(to_parent, #fmt["yay{%u}", sz]);
118 comm::send(to_parent, buffer);
119 }
120
121 // given a FASTA file on stdin, process sequence THREE
122 fn main(args: [str]) {
123 let rdr = if os::getenv("RUST_BENCH").is_some() {
03e186c @brson bench: Modify shootout-k-nucleotide to not read from the filesystem
brson authored
124 // FIXME: Using this compile-time env variable is a crummy way to
13d4b61 @catamorphism Comments only: annotate FIXMEs in tests
catamorphism authored
125 // get to this massive data set, but #include_bin chokes on it (#2598)
03e186c @brson bench: Modify shootout-k-nucleotide to not read from the filesystem
brson authored
126 let path = path::connect(
127 #env("CFG_SRC_DIR"),
128 "src/test/bench/shootout-k-nucleotide.data"
129 );
130 result::get(io::file_reader(path))
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
131 } else {
132 io::stdin()
133 };
134
135
136
137 // initialize each sequence sorter
138 let sizes = [1u,2u,3u,4u,6u,12u,18u];
139 let from_child = vec::map (sizes, { |_sz| comm::port() });
140 let to_parent = vec::mapi(sizes, { |ii, _sz| comm::chan(from_child[ii]) });
141 let to_child = vec::mapi(sizes, fn@(ii: uint, sz: uint) -> comm::chan<[u8]> {
142 ret task::spawn_listener { |from_parent|
143 make_sequence_processor(sz, from_parent, to_parent[ii]);
144 };
145 });
146
147
148 // latch stores true after we've started
149 // reading the sequence of interest
150 let mut proc_mode = false;
151
152 while !rdr.eof() {
153 let line: str = rdr.read_line();
154
155 if str::len(line) == 0u { cont; }
156
157 alt (line[0], proc_mode) {
158
159 // start processing if this is the one
160 ('>' as u8, false) {
161 alt str::find_str_from(line, "THREE", 1u) {
162 option::some(_) { proc_mode = true; }
163 option::none { }
164 }
165 }
166
167 // break our processing
168 ('>' as u8, true) { break; }
169
170 // process the sequence for k-mers
171 (_, true) {
172 let line_bytes = str::bytes(line);
173
174 for sizes.eachi { |ii, _sz|
175 let mut lb = line_bytes;
176 comm::send(to_child[ii], lb);
177 }
178 }
179
180 // whatever
181 _ { }
182 }
183 }
184
185 // finish...
186 for sizes.eachi { |ii, _sz|
187 comm::send(to_child[ii], []);
188 }
189
190 // now fetch and print result messages
191 for sizes.eachi { |ii, _sz|
192 io::println(comm::recv(from_child[ii]));
193 }
194 }
195
Something went wrong with that request. Please try again.