Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Shootout: Initial commit + regex-dna benchmark.

  • Loading branch information...
commit 882efd09f6028902a028be3d6ce3bd151514d086 1 parent 9f1cc0b
authored May 27, 2009
1,671  shootout/regex-dna.input
1671 additions, 0 deletions not shown
50  shootout/regex-dna.p5.pl
... ...
@@ -0,0 +1,50 @@
  1
+# The Computer Language Benchmarks Game
  2
+# http://shootout.alioth.debian.org/
  3
+# contributed by Danny Sauer
  4
+# completely rewritten and
  5
+# cleaned up for speed and fun by Mirco Wahab
  6
+# improved STDIN read, regex clean up by Jake Berner
  7
+
  8
+use strict;
  9
+use warnings;
  10
+
  11
+my $l_file  = -s STDIN;
  12
+my $content; read STDIN, $content, $l_file;
  13
+# this is significantly faster than using <> in this case
  14
+
  15
+my $dispose =  qr/(^>.*)?\n/m; # slight performance gain here
  16
+   $content =~ s/$dispose//g;
  17
+my $l_code  =  length $content;
  18
+
  19
+my @seq = ( 'agggtaaa|tttaccct',
  20
+        '[cgt]gggtaaa|tttaccc[acg]',
  21
+        'a[act]ggtaaa|tttacc[agt]t',
  22
+        'ag[act]gtaaa|tttac[agt]ct',
  23
+        'agg[act]taaa|ttta[agt]cct',
  24
+        'aggg[acg]aaa|ttt[cgt]ccct',
  25
+        'agggt[cgt]aa|tt[acg]accct',
  26
+        'agggta[cgt]a|t[acg]taccct',
  27
+        'agggtaa[cgt]|[acg]ttaccct' );
  28
+
  29
+my @cnt = (0) x @seq;
  30
+for my $k (0..$#seq) {
  31
+  ++$cnt[$k] while $content=~/$seq[$k]/gi;
  32
+  printf "$seq[$k] $cnt[$k]\n"
  33
+}
  34
+
  35
+my %iub = (         B => '(c|g|t)',  D => '(a|g|t)',
  36
+  H => '(a|c|t)',   K => '(g|t)',    M => '(a|c)',
  37
+  N => '(a|c|g|t)', R => '(a|g)',    S => '(c|g)',
  38
+  V => '(a|c|g)',   W => '(a|t)',    Y => '(c|t)' );
  39
+
  40
+# using $& and no submatch marginally improves the
  41
+# speed here, but mentioning $& causes perl to
  42
+# define that value for the @seq patterns too, which
  43
+# slows those down considerably. No change.
  44
+
  45
+my $findiub = '(['.(join '', keys %iub).'])';
  46
+
  47
+$content =~ s/$findiub/$iub{$1}/g;
  48
+
  49
+printf "\n%d\n%d\n%d\n", $l_file, $l_code, length $content;
  50
+
46  shootout/regex-dna.p6.pl
... ...
@@ -0,0 +1,46 @@
  1
+# The Computer Language Benchmarks Game
  2
+# 
  3
+# Based on the submission for Perl 5.
  4
+# contributed by Daniel carrera
  5
+
  6
+my $content = $*IN.slurp;
  7
+my $len_file = $content.chars;
  8
+
  9
+$content .= subst(/ (^^ \> \N*)? \n/, '', :global);
  10
+$content = lc $content;
  11
+my $len_code = $content.chars;
  12
+
  13
+my @seq = ( 'agggtaaa|tttaccct',
  14
+        '[cgt]gggtaaa|tttaccc[acg]',
  15
+        'a[act]ggtaaa|tttacc[agt]t',
  16
+        'ag[act]gtaaa|tttac[agt]ct',
  17
+        'agg[act]taaa|ttta[agt]cct',
  18
+        'aggg[acg]aaa|ttt[cgt]ccct',
  19
+        'agggt[cgt]aa|tt[acg]accct',
  20
+        'agggta[cgt]a|t[acg]taccct',
  21
+        'agggtaa[cgt]|[acg]ttaccct' );
  22
+my @regex = ( /agggtaaa|tttaccct/,
  23
+        /<[cgt]>gggtaaa|tttaccc<[acg]>/,
  24
+        /a<[act]>ggtaaa|tttacc<[agt]>t/,
  25
+        /ag<[act]>gtaaa|tttac<[agt]>ct/,
  26
+        /agg<[act]>taaa|ttta<[agt]>cct/,
  27
+        /aggg<[acg]>aaa|ttt<[cgt]>ccct/,
  28
+        /agggt<[cgt]>aa|tt<[acg]>accct/,
  29
+        /agggta<[cgt]>a|t<[acg]>taccct/,
  30
+        /agggtaa<[cgt]>|<[acg]>ttaccct/ );
  31
+
  32
+my @cnt = (0) xx @seq;
  33
+for @seq.keys -> $k {
  34
+	for $content.comb(@regex[$k]) { @cnt[$k]++ }
  35
+	say @seq[$k] ~ " " ~ @cnt[$k];
  36
+}
  37
+
  38
+my %iub = 	'b' => '(c|g|t)',	'd' => '(a|g|t)',	'h' => '(a|c|t)',
  39
+			'k' => '(g|t)',		'm' => '(a|c)',		'n' => '(a|c|g|t)',
  40
+			'r' => '(a|g)',		's' => '(c|g)',		'v' => '(a|c|g)',
  41
+			'w' => '(a|t)',		'y' => '(c|t)';
  42
+
  43
+$content .= subst(/(<[bdhkmnrsvwy]>)/, {%iub{$0}}, :global);
  44
+
  45
+say "\n$len_file\n$len_code\n" ~ $content.chars;
  46
+

0 notes on commit 882efd0

Please sign in to comment.
Something went wrong with that request. Please try again.