Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100755 708 lines (634 sloc) 28.307 kb
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
1 #!/usr/bin/perl
2
3 # Copyright (C) 2004-2011, The Perl Foundation.
4
5 ## The "make spectest" target tells us how many tests we failed
6 ## (hopefully zero!), but doesn't say how many were actually passed.
7 ## This script runs the spectest tests and summarizes
8 ## passed, failed, todoed, skipped, executed and planned test results.
9 ##
10 ## Usage:
39fc591 Stefan O'Rear Extend test_summary to support an --archive option accepting TAP::Harnes...
sorear authored
11 ## tools/test_summary [--timing | --view] [--archive tap_archive.tar.gz] <implementation> [testlist]
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
12 ##
13 ## The --timing option enables microsecond timing per test saved
14 ## in docs/test_summary.times.
15 ## The --view option renders docs/test_summary.times in various reports
16 ## If supplied, C<testlist> identifies an alternate list of tests
17 ## to use (e.g., t/localtest.data).
18
7f5a8bb Will Coleda Note new rakudo requirement
coke authored
19 ## Rakudo Note:
20 ## Using this script with rakudo requires setting the PERL6LIB env var
21 ## to point to its lib directory so we can find Test.pm
22
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
23 use strict;
24 use warnings;
25 use Time::Local;
26 use Time::HiRes;
27 use Getopt::Long;
28
29 my $timing;
30 my $view;
39fc591 Stefan O'Rear Extend test_summary to support an --archive option accepting TAP::Harnes...
sorear authored
31 my $archive;
32 unless (GetOptions('timing' => \$timing, 'view' => \$view, 'archive:s' => \$archive)) {
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
33 die "$0 cannot handle the unknown option\n";
34 }
35 if ($view) { Simple::Relative::Benchmarking::view(); exit(0); }
36
37 my $benchmark;
38 # Comment out the next line to skip benchmarking; see docs below
39 $benchmark = Simple::Relative::Benchmarking::begin() if $timing;
40
41 # Which implementation are we running?
42 my $implementation = $ARGV[0] ||
43 die "Must specify an implementation";
44
39fc591 Stefan O'Rear Extend test_summary to support an --archive option accepting TAP::Harnes...
sorear authored
45 my $impl_class = $archive ? 'ArchiveRunner' : 'RealRunner';
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
46
47 # Put test names in %tname, with the 't/spec/' removed from the start
48 # and truncated to 49 characters. Keep track of the maximum name length.
39fc591 Stefan O'Rear Extend test_summary to support an --archive option accepting TAP::Harnes...
sorear authored
49 my @tfiles = $impl_class->get_test_names;
50 my %tname;
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
51 @tfiles = sort @tfiles;
52 my $max = 0;
53 for my $tfile (@tfiles) {
54 my $tname = $tfile;
55 $tname =~ s{^t/spec/}{};
56 $tname = substr($tname, 0, 49);
57 if (length($tname) > $max) {
58 $max = length($tname);
59 }
60 $tname{$tfile} = $tname;
61 }
62
63 # Prepare arrays and hashes to gather and accumulate test statistics
64 my @col = qw(pass fail todo skip plan spec);
916b24e Will Coleda We now have tests for S22
coke authored
65 my @syn = qw(S01 S02 S03 S04 S05 S06 S07 S09 S10 S11 S12 S13 S14 S15 S16 S17 S19 S22 S24 S26 S28 S29 S32 int ros);
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
66 my %syn; # number of test scripts per Synopsis
67 my %sum; # total pass/fail/todo/skip/test/plan per Synposis
68 my $syn;
69 for $syn (@syn) {
70 $syn{$syn} = 0;
71 for my $col (@col) {
72 $sum{"$syn-$col"} = 0;
73 }
74 }
75 $syn = ''; # to reliably trigger the display of column headings
76
77 # Execute all test scripts, aggregate the results, display the failures
78 $| = 1;
79 my ( @fail, @plan_hint );
80 my %plan_per_file;
81 for my $tfile (@tfiles) {
82 my $th;
83 open($th, '<', $tfile) || die "Can't read $tfile: $!\n";
84 my ($pass,$fail,$todo,$skip,$plan,$abort,$bonus) = (0,0,0,0,0,0,0);
85 my $no_plan = 0; # planless works, but is unhelpful for statistics
86 # http://www.shadowcat.co.uk/blog/matt-s-trout/a-cunning-no_plan/
87 while (<$th>) { # extract the number of tests planned
88 if (/^\s*plan\D*(\d+)/) { $plan = $1; last; }
89 elsif (/^\s*plan\s+\*;/) { $no_plan = 1; last; }
90 }
91 close $th or die $!;
92 my $tname = $tname{$tfile};
93 # Repeat the column headings at the start of each Synopsis
94 if ( $syn ne substr($tname, 0, 3) ) {
95 $syn = substr($tname, 0, 3);
96 printf( "%s pass fail todo skip plan\n", ' ' x $max );
97 unless ( exists $syn{$syn} ) {
98 push @fail, "note: test_summary.pl \@syn does not have $syn";
99 }
100 }
101 $syn{$syn}++;
102 printf "%s%s..", $tname, '.' x ($max - length($tname));
39fc591 Stefan O'Rear Extend test_summary to support an --archive option accepting TAP::Harnes...
sorear authored
103 my @results = split /\n/, $impl_class->run_test($tfile);
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
104 my (%skip, %todopass, %todofail);
105 my ($time1, $time2, $testnumber, $test_comment ) = ( 0, 0, 0, '' );
106 my @times = (); my @comments = ();
107 for (@results) {
108 # Pass over the optional line containing "1..$planned"
109 if (/^1\.\.(\d+)/) { $plan = $1 if $1 > 0; next; }
110 # Handle lines containing test times
111 if (/^# t=(\d+)/) {
112 my $microseconds = $1;
113 if ( $testnumber > 0 ) {
114 # Do this only if the time was after a test result
115 $times[ $testnumber] = $microseconds;
116 $comments[$testnumber] = $test_comment;
117 $testnumber = 0; # must see require another "ok $n" first
118 }
119 next;
120 }
121 # Ignore lines not beginning with "ok $$test" or "not ok $test"
122 next unless /^(not )?ok +(\d+)/;
123 if (/#\s*SKIP\s*(.*)/i) { $skip++; $skip{$1}++; }
124 elsif (/#\s*TODO\s*(.*)/i) { $todo++;
125 my $reason = $1;
126 if (/^ok /) { $todopass{$reason}++ }
127 else { $todofail{$reason}++ }
128 }
129 elsif (/^not ok +(.*)/) { $fail++; push @fail, "$tname $1"; }
130 elsif (/^ok +(\d+) - (.*)$/) {
131 $pass++; $testnumber = $1; $test_comment = $2;
132 }
5ee36f4 Solomon Foster Tests which have no description still have passed, honest.
colomon authored
133 elsif (/^ok +(\d+)$/) {
134 $pass++; $testnumber = $1; $test_comment = "";
135 }
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
136 }
137 my $test = $pass + $fail + $todo + $skip;
138 if ($plan > $test) {
139 $abort = $plan - $test;
140 $fail += $abort;
141 push @fail, "$tname aborted $abort test(s)";
142 }
143 elsif ($plan < $test) {
144 $bonus = $test - $plan;
145 push @fail, "$tname passed $bonus unplanned test(s)";
146 }
147 if ($no_plan) {
148 push @plan_hint, "'plan *;' could become 'plan $plan;' in $tname";
149 }
150 printf "%4d %4d %4d %4d %4d\n",
151 $pass, $fail, $todo, $skip, $plan;
152 $sum{'pass'} += $pass; $sum{"$syn-pass"} += $pass;
153 $sum{'fail'} += $fail; $sum{"$syn-fail"} += $fail;
154 $sum{'todo'} += $todo; $sum{"$syn-todo"} += $todo;
155 $sum{'skip'} += $skip; $sum{"$syn-skip"} += $skip;
156 $sum{'plan'} += $plan; $sum{"$syn-plan"} += $plan;
157 {
158 my $f = $tfile;
159 $f =~ s/\.$implementation$/.t/;
160 $plan_per_file{$f} = $plan;
161 }
162 for (keys %skip) {
163 printf " %3d skipped: %s\n", $skip{$_}, $_;
164 }
165 for (keys %todofail) {
166 printf " %3d todo : %s\n", $todofail{$_}, $_;
167 }
168 for (keys %todopass) {
169 printf " %3d todo PASSED: %s\n", $todopass{$_}, $_;
170 }
171 if ($abort) {
172 printf " %3d tests aborted (missing ok/not ok)\n", $abort;
173 }
174 if ($bonus) {
175 printf " %3d tests more than planned were run\n", $bonus;
176 }
177 defined $benchmark && $benchmark->log_script_times($tfile,\@times,\@comments);
178 } # for my $tfile (@tfiles)
179 defined $benchmark && $benchmark->end(); # finish simple relative benchmarking
180
181 # Calculate plan totals from test scripts grouped by Synopsis and overall.
182 # This ignores any test list and processes all unfudged files in t/spec/.
183 # Implementing 'no_plan' or 'plan *' in test scripts makes this total
184 # inaccurate.
185 for my $syn (sort keys %syn) {
186 my $grepcmd = "grep ^plan t/spec/$syn*/* -rHn"; # recurse, always say filename, include line number for troubleshooting
187 my @grep_output = `$grepcmd`; # gets an array of all the plan lines
188 my $total_tests_planned_per_synopsis = 0;
189 for (@grep_output) {
190 # Most test scripts have a conventional 'plan 42;' or so near
191 # the beginning which is what we need. Unfortunately some have
192 # 'plan $x*$y;' or so, which we cannot dynamically figure out.
193
194 # Example grep output: t/spec/S02-names/our.t:4:plan 10;
195 # Extract the filename and plan count from that if possible.
196 if ( m/ ^ ([^:]*) : \d+ : plan (.*) $ /x ) {
197 my ( $filename, $planexpression ) = ( $1, $2 );
198 my $script_planned_tests = 0;
199 if ( $filename =~ m/\.t$/ ) {
200 if ( $planexpression =~ m/ ^ \s* (\d+) \s* ; $ /x ) {
201 # A conventional 'plan 42;' type of line
202 $script_planned_tests = $1;
203 }
204 else {
205 # It is some other plan argument, either * or variables.
206 # A workaround is to get the actual number of tests run
207 # from the output and just assume is the same number,
208 # but sometimes that is missing too.
209 if ( exists $plan_per_file{$filename} ) {
210 $script_planned_tests = $plan_per_file{$filename};
211 }
212 }
213 }
214 $total_tests_planned_per_synopsis += $script_planned_tests;
215 }
216 }
217 $sum{"$syn-spec"} = $total_tests_planned_per_synopsis;
218 $sum{'spec'} += $total_tests_planned_per_synopsis;
219 }
220
221 # Planless testing (eg 'plan *;') is useless for static analysis, making
222 # tools jump through hoops to calculate the number of planned tests.
223 # This part display hints about the scripts that could easily be edited
224 # make life easier on the reporting side.
225 # A test suite author can follow the hints and write the automatically
226 # counted number of tests into the test script, changing it back from
227 # planless to planned.
228
229 if (@plan_hint) {
230 print "----------------\n";
231 foreach (@plan_hint) {
232 print " $_\n";
233 }
234 }
235
236 # Show test totals grouped by Synopsys, followed by overall totals
237 print "----------------\n";
238 my $sumfmt = qq(%-11.11s %6s,%6s,%6s,%6s,%6s,%6s\n);
239 printf $sumfmt, qq{"Synopsis",}, map { qq{"$_"} } @col;
240 for my $syn (sort keys %syn) {
241 printf $sumfmt, qq{"$syn",}, map { $sum{"$syn-$_"} } @col;
242 }
243 my $total = scalar(@tfiles).' regression files';
244 printf $sumfmt, qq{"total",}, map { $sum{$_} } @col;
245 print "----------------\n";
246
247 # Optionally show the statistics that can be manually appended to
248 # docs/spectest-progress.csv
249 if ($ENV{'REV'}) {
250 my @gmt = gmtime;
251 my $testdate = sprintf '"%4d-%02d-%02d %02d:%02d"', $gmt[5]+1900,
252 $gmt[4]+1, $gmt[3], $gmt[2], $gmt[1];
253 my $filecount = scalar(@tfiles);
254 my $passpercent = 100 * $sum{'pass'} / $sum{'spec'};
255 print join(',', $ENV{'REV'}, (map { $sum{$_} } @col),
256 $filecount), "\n";
257 printf "spectest-progress.csv update: " .
258 "%d files, %d (%.1f%% of %d) pass, %d fail\n",
259 $filecount, $sum{'pass'}, $passpercent, $sum{'spec'}, $sum{'fail'};
260 }
261
262 # List descriptions of the tests that failed
263 if (@fail) {
264 print "Failure summary:\n";
265 foreach (@fail) {
266 print "$_\n";
267 }
268 }
269 else {
270 print "No failures!\n";
271 }
272
273 # End of main program
274
275 #-------------------- Simple Relative Benchmarking ---------------------
276
277 package Simple::Relative::Benchmarking;
278
279 # begin
280 # Initialize simple relative benchmarking. Called before the first test
281 sub begin {
282 my $timings = shift || 5; # number of timings to keep (default 5)
283 my $self = {};
284 my @test_history;
285 $self->{'Timings'} = $timings;
286 $self->{'Last_test_loaded'} = '';
287
288 if ( open( $self->{'file_in'}, '<', 'docs/test_summary.times') ) {
289 my $file_in = $self->{'file_in'};
290 my $line = <$file_in>; chomp $line;
291 if ( $line =~ m/{"test_.+":\[/i ) { # should be Test_history
292 $line = <$file_in>; chomp $line;
293 while ( $line =~ m/\s\s(.+\d\d\d\d-\d\d-\d\d.\d\d:\d\d:\d\d.+)/ ) {
294 my $history_line = $1;
295 $history_line =~ s/,$//; # trim possible trailing comma
296 push @test_history, $history_line;
297 $line = <$file_in>; chomp $line;
298 } # ends on the ' ],' line after the test_history
299 $line = <$file_in>; chomp $line;
300 # if ( $line =~ m/ "test_microseconds":{/i ) {
301 # warn "begin reached 'test_microseconds'\n";
302 # }
303 }
304 }
305 open( $self->{'file_out'}, '>', 'docs/test_summary.times.tmp') or die "cannot create docs/test_summary.times.tmp: $!";
e484df9 lizmat Fix deprecated use of $*VM<config>
lizmat authored
306 my $parrot_version = qx{./perl6 -e'print \$*VM.config<revision>'};
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
307 my $impl_version = qx{git log --pretty=oneline --abbrev-commit --max-count=1 .}; chomp $impl_version;
308 $impl_version =~ s/^([0-9a-f])+\.\.\./$1/; # delete possible ...
309 $impl_version =~ s/\\/\\\\/g; # escape all backslashes
310 $impl_version =~ s/\"/\\\"/g; # escape all double quotes
311 my $file_out = $self->{'file_out'};
312 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime(time());
313 push @test_history, sprintf("[\"%4d-%02d-%02d %02d:%02d:%02d\",%d,\"%s\"]",
314 $year+1900, $mon+1, $mday, $hour, $min, $sec,
315 $parrot_version, $impl_version );
316 # Delete the oldest test test_history if there are too many.
317 while ( @test_history > $self->{'Timings'} ) { shift @test_history; }
318 print $file_out qq!{"test_history":[\n!;
319 print $file_out " " . join(",\n ",@test_history) . "\n ],\n";
320 print $file_out qq! "test_microseconds":{!;
321 # tell Test.pm to output per-test timestamps
322 $ENV{'PERL6_TEST_TIMES'} = 'true';
323 return bless $self;
324 }
325
326 # Track simple relative benchmarking. Called after running each test script.
327 sub log_script_times {
328 my $self = shift;
329 my $test_name = shift;
330 my $ref_times = shift;
331 my $ref_comments = shift;
332 # Make local arrays of the execution times in microseconds, and test
333 # comments (or descriptions). Since tests are being added to and
334 # removed from the test suite, test numbers change over time. The
335 # comments are sometimes empty or duplicated, but they are the only
336 # way to correlate test results if the test suite is edited.
337 my (@times) = @$ref_times;
338 my (@comments) = @$ref_comments;
339 shift @times; # offset by 1: the first result becomes $times[0];
340 shift @comments;
341 for ( my $i=0; $i<=@times; $i++ ) {
342 if ( not defined $comments[$i] ) { $comments[$i] = ''; }
343 $comments[$i] =~ s/\\/\\\\/g; # escape all backslashes
344 $comments[$i] =~ s/\"/\\\"/g; # escape all double quotes
345 }
346 my ( $line );
347 my $file_in = $self->{'file_in'};
348 my $file_out = $self->{'file_out'};
349 $test_name =~ s{^t/spec/}{}; # eg 'S02-literals/numeric.t'
350 my $test_separator;
351 if ( $self->{'Last_test_loaded'} eq '' ) {
352 $test_separator = "\n";
353 }
354 else {
355 $test_separator = ",\n";
356 }
357 while ( not eof($file_in) and $self->{'Last_test_loaded'} lt $test_name ) {
358 $line = <$file_in>; chomp $line;
359 if ( $line =~ m/^\s\s"(.+)":.$/ ) {
360 $self->{'Last_test_loaded'} = $1;
361 }
362 }
363 my @logged_results;
364 if ( not eof($file_in) and $self->{'Last_test_loaded'} eq $test_name ) {
365 my $line = <$file_in>; chomp $line;
366 while ( not eof($file_in) and $line =~ m/^\s\s\s\[(\d+),\[(.+?)\],?/ ) {
367 my $test_number = $1;
368 my @timings = split /,/ , $2;
369 $logged_results[$test_number-1] = [ @timings ];
370 $line = <$file_in>; chomp $line;
371 }
372 }
373 my $microseconds = [];
374 my $testcount = @times;
375 for ( my $test_number=0; $test_number<$testcount; $test_number++) {
376 unless ( defined($times[$test_number]) ) { $times[$test_number] = 0; }
377 my ( @times_in_file );
a31f1e7 lizmat Fix deprecation
lizmat authored
378 if ( @{$logged_results[$test_number]} ) {
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
379 @times_in_file = ( @{$logged_results[$test_number]} );
380 }
381 push @times_in_file, $times[$test_number];
382 if ( not defined( $times_in_file[0] ) ) { shift @times_in_file; }
383 # Delete the oldest test timings if there are too many.
384 while ( @times_in_file > $self->{'Timings'} ) { shift @times_in_file; }
385 $$microseconds[$test_number] = [ @times_in_file ];
386 }
387 my $test_number = 1; # start from number 1 again
388 print $file_out
389 $test_separator .
390 qq' "$test_name":[\n' .
391 join(",\n", map {' ['.$test_number++.',['.join(',',@$_).'],"'.$comments[$test_number-2].'"]'} @$microseconds) .
392 qq'\n ]';
393 }
394
395 # Finish simple relative benchmarking. Called after the first test
396 sub end {
397 my $self = shift;
398 my $file_in = $self->{'file_in'};
399 my $file_out = $self->{'file_out'};
400 print $file_out "\n }\n}\n";
401 close $file_out or warn $!;
402 close $file_in or warn $!;
403 unlink 'docs/test_summary.times';
404 rename 'docs/test_summary.times.tmp', 'docs/test_summary.times';
405 }
406
407 # Report on simple relative benchmarking. Does the --view option
408 sub view
409 {
410 my $choice = '1'; my $choiceA; my $choiceB;
411 do {
412 my ($input, $output, $t, @timings, $script, @z, @sorted, @runs);
413 my @ordername = ('', 'sorted by time', 'sorted by %change', 'sorted by time change', 'in test order' );
414 open($input, '<', 'docs/test_summary.times') or die "$0 cannot open docs/test_summary.times\n";
415 while (<$input>) { # custom parser to avoid dependency on JSON.pm
416 # a commit identification line
417 if (/^\s\s\[\"([^"]*)\",\d+,\"([^"]*)\"/) { push @runs, { 'time'=>$1, 'comment'=>$2 }; }
418 # test script name
419 if (/^\s\s\"(.+)\":\[$/x) { $script = $1; }
420 # individual test times
421 if (/^\s\s\s\[(\d+),\[([0-9,]+)\],\"(.*)\"\],/x) {
422 unless (defined $choiceA) { $choiceB = $#runs; $choiceA = $choiceB-1; }
423 my $testnumber = $1;
424 my @times = split /,/, $2;
425 push @times, 0 while @times < 5;
426 my $testcomment = $3;
427 if ($times[$choiceA] > 0 && $times[$choiceB] > 0) {
428 push @timings, [ [@times], $testcomment, $testnumber, $script];
429 }
430 }
431 }
432 close($input);
433 @z=(); # Prepare to sort using a Schwartzian transform
434 if ($choice eq '1') { # by execution time
435 for my $t ( @timings ) { push @z, $$t[0][$choiceB]; }
436 }
437 elsif ($choice eq '2') { # by relative speedup/slowdown
438 for my $t ( @timings ) { push @z, ($$t[0][$choiceB]-$$t[0][$choiceA])/$$t[0][$choiceA]; }
439 }
440 elsif ($choice eq '3') { # by absolute speedup/slowdown
441 for my $t ( @timings ) { push @z, ($$t[0][$choiceB]-$$t[0][$choiceA]); }
442 }
443 else {
444 @sorted = @timings; # choice '4' is unsorted, meaning in order of execution
445 }
446 @sorted = @timings[ sort { $z[$a] <=> $z[$b] } 0..$#timings ] if @z;
447 # Send the results to 'less' for viewing
448 open $output, ">", "/tmp/test_summary.$$" or die "$0 cannot output to 'less'\n";
449 print $output "Microseconds and relative change of spec tests $ordername[$choice]. Commits:\n";
450 print $output "A: $runs[$choiceA]{'time'} $runs[$choiceA]{'comment'}\n";
451 print $output "B: $runs[$choiceB]{'time'} $runs[$choiceB]{'comment'}\n";
452 print $output " A B Chg Test description (script#test)\n";
453 for $t (@sorted) {
454 printf $output "%6d %5d %+3.0f%% %s (%s#%d)\n", $$t[0][$choiceA], $$t[0][$choiceB],
455 ($$t[0][$choiceB]-$$t[0][$choiceA])*100/$$t[0][$choiceA], $$t[1], $$t[3], $$t[2];
456 }
457 close $output;
458 system "less --chop-long-lines /tmp/test_summary.$$";
459 do { # Prompt for user choice of sort order or commits
460 print 'view: sort by 1)time 2)%change 3)change 4)none, other 5)commits q)uit> ';
461 $choice = <STDIN>; chomp $choice;
462 if ($choice eq '5') { # choose a commit
463 for (my $r=0; $r<@runs; ++$r) {
464 print "$r: $runs[$r]{'time'} $runs[$r]{'comment'}\n";
465 }
466 print 'commit for column A: ';
467 $choiceA = <STDIN>; chomp $choiceA;
468 print 'commit for column B: ';
469 $choiceB = <STDIN>; chomp $choiceB;
470 }
471 } while index('5', $choice) >= 0; # if user chose commits, must still choose sort order
472 } while index('1234', $choice) >= 0; # if valid sort order (not 'q') then do another report
473 }
474
39fc591 Stefan O'Rear Extend test_summary to support an --archive option accepting TAP::Harnes...
sorear authored
475 package RealRunner;
476
477 sub get_test_names {
478 my ($self) = @_;
479
480 # Build the list of test scripts to run in @tfiles
481 my @tfiles;
482 my $testlist = $ARGV[1] || 't/spectest.data';
483 my $fh;
484 open($fh, '<', $testlist) || die "Can't read $testlist: $!";
485 while (<$fh>) {
486 /^ *#/ && next;
259ed8d Will Coleda FIX jvm fudgespec skip
coke authored
487 my ($specfile, $fudgespec) = split ' ', $_, 2;
39fc591 Stefan O'Rear Extend test_summary to support an --archive option accepting TAP::Harnes...
sorear authored
488 next unless $specfile;
36637c8 Will Coleda respect new flag in rakudo's specttest.data
coke authored
489 # XXX each implementation should tell us what files to use
490 next if $fudgespec =~ /conc/ && $implementation =~ /parrot/;
259ed8d Will Coleda FIX jvm fudgespec skip
coke authored
491 next if $fudgespec =~ /jvm/ && $implementation !~ /jvm/;
6d1c47c Will Coleda allow rakudo-moar to skip test files
coke authored
492 next if $fudgespec =~ /moar/ && $implementation !~ /moar/;
39fc591 Stefan O'Rear Extend test_summary to support an --archive option accepting TAP::Harnes...
sorear authored
493 push @tfiles, "t/spec/$specfile";
494 }
495 close $fh or die $!;
496
497 # Fudge any implementation specific tests by running the fudgeall script
498 {
499 my $cmd = join ' ', $^X, 't/spec/fudgeall', $implementation, @tfiles;
500 # Fudgeall prints the name of each test script, but changes the name
501 # ending to match the implementation instead of .t if tests were fudged.
502 print "$cmd\n";
503 @tfiles = split ' ', `$cmd`; # execute fudgeall, collect test names
504 }
505 @tfiles;
506 }
507
508 sub run_test {
509 my ($self, $tfile) = @_;
510
511 my $cmd = "./perl6 $tfile";
512 # Run the test, collecting all stdout in @results
513 scalar qx{$cmd};
514 }
515
516 package ArchiveRunner;
517
518 my $arc_file;
519
520 sub get_test_names {
521 require Archive::Tar;
522 $arc_file = Archive::Tar->new($archive);
523 grep /^t/, $arc_file->list_files;
524 }
525
526 sub run_test {
527 my ($self, $tfile) = @_;
528 $arc_file->get_content($tfile);
529 }
530
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
531 package main;
532
533 =pod
534
535 =head1 NAME
536
537 tools/test_summary.pl -- run spectests and make statistical reports
538
539 =head1 DESCRIPTION
540
541 This test harness written in Perl 5, runs the Perl 6 specification test
542 suite. It uses the same Test Anything Protocol (TAP) as for example
543 L<TAP::Harness>, but does not depend those modules.
544
545 The names of the tests are listed in t/spectest.data, or another file
546 whose name is passed on the command line.
547
548 =head2 OUTPUT
549
550 The harness prints the name of each test script before running it.
551 After completion it prints the total number of tests passed, failed,
552 to do, skipped, and planned. The descriptions of any tests failed,
553 skipped or left to do are also listed.
554
555 After running all the tests listed, the harness prints a set of
556 subtotals per Synopsis.
557
558 If you set the REV environment variable (with the first 7 characters of
3b41fee Will Coleda remove rakudo-specific fossils
coke authored
559 a git commit id), the harness prints an additional set of grand
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
560 totals suitable for adding to F<docs/spectest_progress.csv>.
561
562 =head1 SIMPLE RELATIVE BENCHMARKING
563
564 Too little information can mislead, hence this self deprecating title.
565 For example, these measurements overlook variation in test times
566 ('jitter'), kernel versus user process times, and measurement overheads.
567 But these results are better than no information at all.
568
569 If activated, this tool logs the most recent 5 timings in microseconds
570 in F<docs/test_summary.times> in a specific JSON format, for later
571 analysis. Measurement and logging add less than 2% to the testing time
572 and makes a log file of about 2.5MB.
573
574 =head2 Methods
575
576 =head3 begin
577
578 Accepts an optional parameter, the number of timings per test to keep.
579 Creates a temporary file for new results, and returns an object that
580 updates the log file. (F<begin> acts as the constructor).
581
582 =head3 log_script_times
583
584 Takes these parameters: test script name, reference to an array of times
585 in microseconds, reference to an array of test description strings.
586 Appends the results to the temporary log file.
587
588 =head3 end
589
590 Closes and renames the temporary log file.
591
592 =head2 Timing results file
593
594 All results are stored in F<docs/test_summary.times> in a specific JSON
595 format. With 35000 test result lines and 5 runs it occupies just under
596 2.5 MB.
597
598 Here is an example with a few semi fictitious results:
599
600 {"test_history":[
601 ["2010-05-05 10:15:45",46276,"925629d Make $x does (R1, R2) work."],
602 ["2010-05-07 08:58:07",46276,"5713af2 run two more test files"],
603 ["2010-05-08 18:08:43",46405,"ab23221 bump PARROT_REVISION"],
604 ["2010-05-09 05:53:25",46405,"c49d32b run S04-phasers/rvalues.t"],
605 ["2010-05-10 00:44:46",46405,"118f4aa Overhaul sqrt for Numeric / Real."]
606 ],
607 "test_microseconds":{
608 "S02-builtin_data_types/anon_block.rakudo":[
609 [1,[6139,7559,6440,6289,5520],"The object is-a 'Sub()'"],
610 [2,[6610,6599,6690,6580,6010],"sub { } works"]
611 ],
612 "S02-builtin_data_types/array.rakudo":[
613 [1,[9100,8889,9739,9140,9169],"for 1, 2, 3 does 3 iterations"],
614 [2,[5650,5599,6119,9819,5140],"for (1, 2, 3).item 3 iterations"],
615 [3,[3920,3770,4190,4410,3350],"for [1, 2, 3] does one iteration"]
616 ]
617 }
618 }
619
620 The "test_history" section lists the starting times for all the runs of
621 F<tools/test_summary.pl> that are recorded in the file. Then the
622 "test_microseconds" records show each test filename, possibly fudged,
623 followed by the test numbers, followed by the times obtained from each
624 run. If a test has fewer than the usual number of timings, the timings
625 will be from the most recent test runs.
626
627 The file is read and written by custom code and not a JSON module, to
628 reduce dependencies. Altering the file format might cause reading to
629 fail and could result in data loss. General purpose JSON parsers should
630 be able to read the data. For example the following ranks the tests
631 from best speedup to worst slowdown.
632
633 #!/usr/bin/perl
634 use File::Slurp qw( slurp );
635 use JSON;
636 my $log_text = slurp('docs/test_summary.times');
637 my $log = JSON->new->decode( $log_text );
638 # Flatten the data structure to a 2-D array of nonzero test times
639 my @timings;
640 my $script_hash = $$log{'test_microseconds'};
641 for my $script_name ( sort keys %$script_hash ) {
642 my $test_list = $$script_hash{$script_name};
643 for my $t ( @$test_list ) {
644 my $times_count = @{$$t[1]};
645 if ( $times_count >= 2 and ${$$t[1]}[$times_count-1] > 0 ) {
646 push @timings, [$script_name, $$t[0], $$t[2], ${$$t[1]}[$times_count-2], ${$$t[1]}[$times_count-1] ];
647 }
648 }
649 }
650 # Sort the timings into improved/worsened order with a Schwartzian transform
651 my @z; for my $t ( @timings ) { push @z, ($$t[4]-$$t[3])/$$t[4]; }
652 my @sorted = @timings[ sort { $z[$a] <=> $z[$b] } 0..$#timings ];
653 # Display the results: quicker is minus, slower is plus.
654 for my $s ( @sorted ) {
655 printf "%+3.0f%% %6d %6d %s:%d:%s\n",
656 ($$s[4]-$$s[3])*100/$$s[3], $$s[3], $$s[4], $$s[0], $$s[1], $$s[2];
657 } # %change, prev-time, latest-time, script, test-num, test-desc
658
659 A second example shows another way to read the results file, and ranks
660 the tests from most to least consistent in execution time.
661
662 #!/usr/bin/perl
663 use JSON;
664 my $log_text = qx{$^X -MExtUtils::Command -e cat docs/test_summary.times};
665 my $log = JSON->new->decode( $log_text );
666 # Flatten the data structure to a 2-D array of nonzero test times
667 my @timings;
668 my $script_hash = $$log{'test_microseconds'};
669 for my $script_name ( sort keys %$script_hash ) {
670 my $test_list = $$script_hash{$script_name};
671 for my $t ( @$test_list ) {
672 my $times_count = @{$$t[1]};
673 if ( $times_count >= 2 and ${$$t[1]}[$times_count-1] > 0 ) {
674 my $min = my $max = ${$$t[1]}[0];
675 for my $i (1..$times_count-1) {
676 $min = ${$$t[1]}[$i] if $min > ${$$t[1]}[$i];
677 $max = ${$$t[1]}[$i] if $max < ${$$t[1]}[$i];
678 }
679 push @timings, [$script_name, $$t[0], $$t[2], $min, $max ] if $min > 0;
680 }
681 }
682 }
683 # Sort the timings into most/least consistent order by Schwartzian transform
684 my @z; for my $t ( @timings ) { push @z, ($$t[4]-$$t[3])/$$t[3]; }
685 my @sorted = @timings[ sort { $z[$a] <=> $z[$b] } 0..$#timings ];
686 # Display the results from most to least consistent
687 for my $s ( @sorted ) {
688 printf "%3.1f%% %6d %6d %s:%d:%s\n",
689 ($$s[4]-$$s[3])*100/$$s[3], $$s[3], $$s[4], $$s[0], $$s[1], $$s[2];
690 } # %difference, min-time, max-time, script, test-num, test-desc
691
692 =head2 TODO
693
694 Detect changes in number of tests or descriptions of tests in each
695 test script, and discard all previous results for that script if there
696 has been a change. Consider whether to log total execution time per
697 test script.
698
699 Analyse and report useful results, such as the slowest n tests.
700
701 Parse the `say now` output as well as `print pir::__time()`.
702
703 =head1 SEE ALSO
704
705 The L<perlperf> module. The L<http://json.org/> site.
706
707 =cut
Something went wrong with that request. Please try again.