|
1 | 1 | # The Computer Language Benchmarks Game
|
2 | 2 | #
|
3 | 3 | # Based on the submission for Perl 5.
|
4 |
| -# contributed by Daniel carrera |
| 4 | +# originally contributed by Daniel carrera |
5 | 5 | #
|
6 | 6 | # USAGE: perl6 regex-dna.p6.pl < regex-dna.input
|
7 | 7 |
|
8 |
| -my $content = $*IN.slurp; |
9 |
| -my $len_file = $content.chars; |
| 8 | +use v6; |
10 | 9 |
|
11 |
| -$content .= subst(/ (^^ \> \N*)? \n/, '', :global); |
12 |
| -$content = lc $content; |
13 |
| -my $len_code = $content.chars; |
| 10 | +my $input = slurp; |
| 11 | +my $data = $input.lines.grep({ $_ !~~ /^ \>/}).join.lc; |
14 | 12 |
|
15 |
| -my @seq = ( 'agggtaaa|tttaccct', |
16 |
| - '[cgt]gggtaaa|tttaccc[acg]', |
17 |
| - 'a[act]ggtaaa|tttacc[agt]t', |
18 |
| - 'ag[act]gtaaa|tttac[agt]ct', |
19 |
| - 'agg[act]taaa|ttta[agt]cct', |
20 |
| - 'aggg[acg]aaa|ttt[cgt]ccct', |
21 |
| - 'agggt[cgt]aa|tt[acg]accct', |
22 |
| - 'agggta[cgt]a|t[acg]taccct', |
23 |
| - 'agggtaa[cgt]|[acg]ttaccct' ); |
24 |
| -my @regex = ( /agggtaaa|tttaccct/, |
25 |
| - /<[cgt]>gggtaaa|tttaccc<[acg]>/, |
26 |
| - /a<[act]>ggtaaa|tttacc<[agt]>t/, |
27 |
| - /ag<[act]>gtaaa|tttac<[agt]>ct/, |
28 |
| - /agg<[act]>taaa|ttta<[agt]>cct/, |
29 |
| - /aggg<[acg]>aaa|ttt<[cgt]>ccct/, |
30 |
| - /agggt<[cgt]>aa|tt<[acg]>accct/, |
31 |
| - /agggta<[cgt]>a|t<[acg]>taccct/, |
32 |
| - /agggtaa<[cgt]>|<[acg]>ttaccct/ ); |
| 13 | +say $_ ~ ' ' ~ +$data.comb($_) for |
| 14 | + /agggtaaa|tttaccct/ but 'agggtaaa|tttaccct', |
| 15 | + /<[cgt]>gggtaaa|tttaccc<[acg]>/ but '[cgt]gggtaaa|tttaccc[acg]', |
| 16 | + /a<[act]>ggtaaa|tttacc<[agt]>t/ but 'a[act]ggtaaa|tttacc[agt]t', |
| 17 | + /ag<[act]>gtaaa|tttac<[agt]>ct/ but 'ag[act]gtaaa|tttac[agt]ct', |
| 18 | + /agg<[act]>taaa|ttta<[agt]>cct/ but 'agg[act]taaa|ttta[agt]cct', |
| 19 | + /aggg<[acg]>aaa|ttt<[cgt]>ccct/ but 'aggg[acg]aaa|ttt[cgt]ccct', |
| 20 | + /agggt<[cgt]>aa|tt<[acg]>accct/ but 'agggt[cgt]aa|tt[acg]accct', |
| 21 | + /agggta<[cgt]>a|t<[acg]>taccct/ but 'agggta[cgt]a|t[acg]taccct', |
| 22 | + /agggtaa<[cgt]>|<[acg]>ttaccct/ but 'agggtaa[cgt]|[acg]ttaccct'; |
33 | 23 |
|
34 |
| -my @cnt = (0) xx @seq; |
35 |
| -for @seq.keys -> $k { |
36 |
| - for $content.comb(@regex[$k]) { @cnt[$k]++ } |
37 |
| - say @seq[$k] ~ " " ~ @cnt[$k]; |
38 |
| -} |
| 24 | +say; |
39 | 25 |
|
40 |
| -my %iub = 'b' => '(c|g|t)', 'd' => '(a|g|t)', 'h' => '(a|c|t)', |
41 |
| - 'k' => '(g|t)', 'm' => '(a|c)', 'n' => '(a|c|g|t)', |
42 |
| - 'r' => '(a|g)', 's' => '(c|g)', 'v' => '(a|c|g)', |
43 |
| - 'w' => '(a|t)', 'y' => '(c|t)'; |
| 26 | +my %iub = 'b' => '(c|g|t)', 'd' => '(a|g|t)', 'h' => '(a|c|t)', |
| 27 | + 'k' => '(g|t)', 'm' => '(a|c)', 'n' => '(a|c|g|t)', |
| 28 | + 'r' => '(a|g)', 's' => '(c|g)', 'v' => '(a|c|g)', |
| 29 | + 'w' => '(a|t)', 'y' => '(c|t)'; |
44 | 30 |
|
45 |
| -$content .= subst(/(<[bdhkmnrsvwy]>)/, -> $/ { %iub{$0} }, :global); |
46 |
| - |
47 |
| -say "\n$len_file\n$len_code\n" ~ $content.chars; |
| 31 | +my $output = $data.subst(/(<[bdhkmnrsvwy]>)/, { %iub{$_} }, :g); |
48 | 32 |
|
| 33 | +.chars.say for $input, $data, $output; |
0 commit comments