Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 671 lines (606 sloc) 27.394 kb
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
1 #!/usr/bin/perl
2
3 # Copyright (C) 2004-2011, The Perl Foundation.
4
5 ## The "make spectest" target tells us how many tests we failed
6 ## (hopefully zero!), but doesn't say how many were actually passed.
7 ## This script runs the spectest tests and summarizes
8 ## passed, failed, todoed, skipped, executed and planned test results.
9 ##
10 ## Usage:
11 ## tools/test_summary [--timing | --view] <implementation> [testlist]
12 ##
13 ## The --timing option enables microsecond timing per test saved
14 ## in docs/test_summary.times.
15 ## The --view option renders docs/test_summary.times in various reports
16 ## If supplied, C<testlist> identifies an alternate list of tests
17 ## to use (e.g., t/localtest.data).
18
7f5a8bb Will Coleda Note new rakudo requirement
coke authored
19 ## Rakudo Note:
20 ## Using this script with rakudo requires setting the PERL6LIB env var
21 ## to point to its lib directory so we can find Test.pm
22
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
23 use strict;
24 use warnings;
25 use Time::Local;
26 use Time::HiRes;
27 use Getopt::Long;
28
29 my $timing;
30 my $view;
31 unless (GetOptions('timing' => \$timing, 'view' => \$view)) {
32 die "$0 cannot handle the unknown option\n";
33 }
34 if ($view) { Simple::Relative::Benchmarking::view(); exit(0); }
35
36 my $benchmark;
37 # Comment out the next line to skip benchmarking; see docs below
38 $benchmark = Simple::Relative::Benchmarking::begin() if $timing;
39
40 # Which implementation are we running?
41 my $implementation = $ARGV[0] ||
42 die "Must specify an implementation";
43
44 # Build the list of test scripts to run in @tfiles
45 my $testlist = $ARGV[1] || 't/spectest.data';
46 my $fh;
47 open($fh, '<', $testlist) || die "Can't read $testlist: $!";
48 my (@tfiles, %tname); # @tfiles lists all test file names before fudging
49 while (<$fh>) {
50 /^ *#/ && next;
51 my ($specfile) = split ' ', $_;
52 next unless $specfile;
53 push @tfiles, "t/spec/$specfile";
54 }
55 close $fh or die $!;
56
3b41fee Will Coleda remove rakudo-specific fossils
coke authored
57 # Fudge any implementation specific tests by running the fudgeall script
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
58 {
59 my $cmd = join ' ', $^X, 't/spec/fudgeall', $implementation, @tfiles;
60 # Fudgeall prints the name of each test script, but changes the name
61 # ending to match the implementation instead of .t if tests were fudged.
62 print "$cmd\n";
63 @tfiles = split ' ', `$cmd`; # execute fudgeall, collect test names
64 }
65
66 # Put test names in %tname, with the 't/spec/' removed from the start
67 # and truncated to 49 characters. Keep track of the maximum name length.
68 @tfiles = sort @tfiles;
69 my $max = 0;
70 for my $tfile (@tfiles) {
71 my $tname = $tfile;
72 $tname =~ s{^t/spec/}{};
73 $tname = substr($tname, 0, 49);
74 if (length($tname) > $max) {
75 $max = length($tname);
76 }
77 $tname{$tfile} = $tname;
78 }
79
80 # Prepare arrays and hashes to gather and accumulate test statistics
81 my @col = qw(pass fail todo skip plan spec);
97359c6 Will Coleda pugs runs some S01 tests
coke authored
82 my @syn = qw(S01 S02 S03 S04 S05 S06 S07 S09 S10 S11 S12 S13 S14 S16 S17 S19 S24 S26 S28 S29 S32 int);
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
83 my %syn; # number of test scripts per Synopsis
84 my %sum; # total pass/fail/todo/skip/test/plan per Synposis
85 my $syn;
86 for $syn (@syn) {
87 $syn{$syn} = 0;
88 for my $col (@col) {
89 $sum{"$syn-$col"} = 0;
90 }
91 }
92 $syn = ''; # to reliably trigger the display of column headings
93
94 # Execute all test scripts, aggregate the results, display the failures
95 $| = 1;
96 my ( @fail, @plan_hint );
97 my %plan_per_file;
98 for my $tfile (@tfiles) {
99 my $th;
100 open($th, '<', $tfile) || die "Can't read $tfile: $!\n";
101 my ($pass,$fail,$todo,$skip,$plan,$abort,$bonus) = (0,0,0,0,0,0,0);
102 my $no_plan = 0; # planless works, but is unhelpful for statistics
103 # http://www.shadowcat.co.uk/blog/matt-s-trout/a-cunning-no_plan/
104 while (<$th>) { # extract the number of tests planned
105 if (/^\s*plan\D*(\d+)/) { $plan = $1; last; }
106 elsif (/^\s*plan\s+\*;/) { $no_plan = 1; last; }
107 }
108 close $th or die $!;
109 my $tname = $tname{$tfile};
110 # Repeat the column headings at the start of each Synopsis
111 if ( $syn ne substr($tname, 0, 3) ) {
112 $syn = substr($tname, 0, 3);
113 printf( "%s pass fail todo skip plan\n", ' ' x $max );
114 unless ( exists $syn{$syn} ) {
115 push @fail, "note: test_summary.pl \@syn does not have $syn";
116 }
117 }
118 $syn{$syn}++;
119 printf "%s%s..", $tname, '.' x ($max - length($tname));
120 my $cmd = "./perl6 $tfile";
121 # Run the test, collecting all stdout in @results
122 my @results = split "\n", qx{$cmd};
123 my (%skip, %todopass, %todofail);
124 my ($time1, $time2, $testnumber, $test_comment ) = ( 0, 0, 0, '' );
125 my @times = (); my @comments = ();
126 for (@results) {
127 # Pass over the optional line containing "1..$planned"
128 if (/^1\.\.(\d+)/) { $plan = $1 if $1 > 0; next; }
129 # Handle lines containing test times
130 if (/^# t=(\d+)/) {
131 my $microseconds = $1;
132 if ( $testnumber > 0 ) {
133 # Do this only if the time was after a test result
134 $times[ $testnumber] = $microseconds;
135 $comments[$testnumber] = $test_comment;
136 $testnumber = 0; # must see require another "ok $n" first
137 }
138 next;
139 }
140 # Ignore lines not beginning with "ok $$test" or "not ok $test"
141 next unless /^(not )?ok +(\d+)/;
142 if (/#\s*SKIP\s*(.*)/i) { $skip++; $skip{$1}++; }
143 elsif (/#\s*TODO\s*(.*)/i) { $todo++;
144 my $reason = $1;
145 if (/^ok /) { $todopass{$reason}++ }
146 else { $todofail{$reason}++ }
147 }
148 elsif (/^not ok +(.*)/) { $fail++; push @fail, "$tname $1"; }
149 elsif (/^ok +(\d+) - (.*)$/) {
150 $pass++; $testnumber = $1; $test_comment = $2;
151 }
5ee36f4 Solomon Foster Tests which have no description still have passed, honest.
colomon authored
152 elsif (/^ok +(\d+)$/) {
153 $pass++; $testnumber = $1; $test_comment = "";
154 }
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
155 }
156 my $test = $pass + $fail + $todo + $skip;
157 if ($plan > $test) {
158 $abort = $plan - $test;
159 $fail += $abort;
160 push @fail, "$tname aborted $abort test(s)";
161 }
162 elsif ($plan < $test) {
163 $bonus = $test - $plan;
164 push @fail, "$tname passed $bonus unplanned test(s)";
165 }
166 if ($no_plan) {
167 push @plan_hint, "'plan *;' could become 'plan $plan;' in $tname";
168 }
169 printf "%4d %4d %4d %4d %4d\n",
170 $pass, $fail, $todo, $skip, $plan;
171 $sum{'pass'} += $pass; $sum{"$syn-pass"} += $pass;
172 $sum{'fail'} += $fail; $sum{"$syn-fail"} += $fail;
173 $sum{'todo'} += $todo; $sum{"$syn-todo"} += $todo;
174 $sum{'skip'} += $skip; $sum{"$syn-skip"} += $skip;
175 $sum{'plan'} += $plan; $sum{"$syn-plan"} += $plan;
176 {
177 my $f = $tfile;
178 $f =~ s/\.$implementation$/.t/;
179 $plan_per_file{$f} = $plan;
180 }
181 for (keys %skip) {
182 printf " %3d skipped: %s\n", $skip{$_}, $_;
183 }
184 for (keys %todofail) {
185 printf " %3d todo : %s\n", $todofail{$_}, $_;
186 }
187 for (keys %todopass) {
188 printf " %3d todo PASSED: %s\n", $todopass{$_}, $_;
189 }
190 if ($abort) {
191 printf " %3d tests aborted (missing ok/not ok)\n", $abort;
192 }
193 if ($bonus) {
194 printf " %3d tests more than planned were run\n", $bonus;
195 }
196 defined $benchmark && $benchmark->log_script_times($tfile,\@times,\@comments);
197 } # for my $tfile (@tfiles)
198 defined $benchmark && $benchmark->end(); # finish simple relative benchmarking
199
200 # Calculate plan totals from test scripts grouped by Synopsis and overall.
201 # This ignores any test list and processes all unfudged files in t/spec/.
202 # Implementing 'no_plan' or 'plan *' in test scripts makes this total
203 # inaccurate.
204 for my $syn (sort keys %syn) {
205 my $grepcmd = "grep ^plan t/spec/$syn*/* -rHn"; # recurse, always say filename, include line number for troubleshooting
206 my @grep_output = `$grepcmd`; # gets an array of all the plan lines
207 my $total_tests_planned_per_synopsis = 0;
208 for (@grep_output) {
209 # Most test scripts have a conventional 'plan 42;' or so near
210 # the beginning which is what we need. Unfortunately some have
211 # 'plan $x*$y;' or so, which we cannot dynamically figure out.
212
213 # Example grep output: t/spec/S02-names/our.t:4:plan 10;
214 # Extract the filename and plan count from that if possible.
215 if ( m/ ^ ([^:]*) : \d+ : plan (.*) $ /x ) {
216 my ( $filename, $planexpression ) = ( $1, $2 );
217 my $script_planned_tests = 0;
218 if ( $filename =~ m/\.t$/ ) {
219 if ( $planexpression =~ m/ ^ \s* (\d+) \s* ; $ /x ) {
220 # A conventional 'plan 42;' type of line
221 $script_planned_tests = $1;
222 }
223 else {
224 # It is some other plan argument, either * or variables.
225 # A workaround is to get the actual number of tests run
226 # from the output and just assume is the same number,
227 # but sometimes that is missing too.
228 if ( exists $plan_per_file{$filename} ) {
229 $script_planned_tests = $plan_per_file{$filename};
230 }
231 }
232 }
233 $total_tests_planned_per_synopsis += $script_planned_tests;
234 }
235 }
236 $sum{"$syn-spec"} = $total_tests_planned_per_synopsis;
237 $sum{'spec'} += $total_tests_planned_per_synopsis;
238 }
239
240 # Planless testing (eg 'plan *;') is useless for static analysis, making
241 # tools jump through hoops to calculate the number of planned tests.
242 # This part display hints about the scripts that could easily be edited
243 # make life easier on the reporting side.
244 # A test suite author can follow the hints and write the automatically
245 # counted number of tests into the test script, changing it back from
246 # planless to planned.
247
248 if (@plan_hint) {
249 print "----------------\n";
250 foreach (@plan_hint) {
251 print " $_\n";
252 }
253 }
254
255 # Show test totals grouped by Synopsys, followed by overall totals
256 print "----------------\n";
257 my $sumfmt = qq(%-11.11s %6s,%6s,%6s,%6s,%6s,%6s\n);
258 printf $sumfmt, qq{"Synopsis",}, map { qq{"$_"} } @col;
259 for my $syn (sort keys %syn) {
260 printf $sumfmt, qq{"$syn",}, map { $sum{"$syn-$_"} } @col;
261 }
262 my $total = scalar(@tfiles).' regression files';
263 printf $sumfmt, qq{"total",}, map { $sum{$_} } @col;
264 print "----------------\n";
265
266 # Optionally show the statistics that can be manually appended to
267 # docs/spectest-progress.csv
268 if ($ENV{'REV'}) {
269 my @gmt = gmtime;
270 my $testdate = sprintf '"%4d-%02d-%02d %02d:%02d"', $gmt[5]+1900,
271 $gmt[4]+1, $gmt[3], $gmt[2], $gmt[1];
272 my $filecount = scalar(@tfiles);
273 my $passpercent = 100 * $sum{'pass'} / $sum{'spec'};
274 print join(',', $ENV{'REV'}, (map { $sum{$_} } @col),
275 $filecount), "\n";
276 printf "spectest-progress.csv update: " .
277 "%d files, %d (%.1f%% of %d) pass, %d fail\n",
278 $filecount, $sum{'pass'}, $passpercent, $sum{'spec'}, $sum{'fail'};
279 }
280
281 # List descriptions of the tests that failed
282 if (@fail) {
283 print "Failure summary:\n";
284 foreach (@fail) {
285 print "$_\n";
286 }
287 }
288 else {
289 print "No failures!\n";
290 }
291
292 # End of main program
293
294 #-------------------- Simple Relative Benchmarking ---------------------
295
296 package Simple::Relative::Benchmarking;
297
298 # begin
299 # Initialize simple relative benchmarking. Called before the first test
300 sub begin {
301 my $timings = shift || 5; # number of timings to keep (default 5)
302 my $self = {};
303 my @test_history;
304 $self->{'Timings'} = $timings;
305 $self->{'Last_test_loaded'} = '';
306
307 if ( open( $self->{'file_in'}, '<', 'docs/test_summary.times') ) {
308 my $file_in = $self->{'file_in'};
309 my $line = <$file_in>; chomp $line;
310 if ( $line =~ m/{"test_.+":\[/i ) { # should be Test_history
311 $line = <$file_in>; chomp $line;
312 while ( $line =~ m/\s\s(.+\d\d\d\d-\d\d-\d\d.\d\d:\d\d:\d\d.+)/ ) {
313 my $history_line = $1;
314 $history_line =~ s/,$//; # trim possible trailing comma
315 push @test_history, $history_line;
316 $line = <$file_in>; chomp $line;
317 } # ends on the ' ],' line after the test_history
318 $line = <$file_in>; chomp $line;
319 # if ( $line =~ m/ "test_microseconds":{/i ) {
320 # warn "begin reached 'test_microseconds'\n";
321 # }
322 }
323 }
324 open( $self->{'file_out'}, '>', 'docs/test_summary.times.tmp') or die "cannot create docs/test_summary.times.tmp: $!";
325 my $parrot_version = qx{./perl6 -e'print \$*VM<config><revision>'};
326 my $impl_version = qx{git log --pretty=oneline --abbrev-commit --max-count=1 .}; chomp $impl_version;
327 $impl_version =~ s/^([0-9a-f])+\.\.\./$1/; # delete possible ...
328 $impl_version =~ s/\\/\\\\/g; # escape all backslashes
329 $impl_version =~ s/\"/\\\"/g; # escape all double quotes
330 my $file_out = $self->{'file_out'};
331 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime(time());
332 push @test_history, sprintf("[\"%4d-%02d-%02d %02d:%02d:%02d\",%d,\"%s\"]",
333 $year+1900, $mon+1, $mday, $hour, $min, $sec,
334 $parrot_version, $impl_version );
335 # Delete the oldest test test_history if there are too many.
336 while ( @test_history > $self->{'Timings'} ) { shift @test_history; }
337 print $file_out qq!{"test_history":[\n!;
338 print $file_out " " . join(",\n ",@test_history) . "\n ],\n";
339 print $file_out qq! "test_microseconds":{!;
340 # tell Test.pm to output per-test timestamps
341 $ENV{'PERL6_TEST_TIMES'} = 'true';
342 return bless $self;
343 }
344
345 # Track simple relative benchmarking. Called after running each test script.
346 sub log_script_times {
347 my $self = shift;
348 my $test_name = shift;
349 my $ref_times = shift;
350 my $ref_comments = shift;
351 # Make local arrays of the execution times in microseconds, and test
352 # comments (or descriptions). Since tests are being added to and
353 # removed from the test suite, test numbers change over time. The
354 # comments are sometimes empty or duplicated, but they are the only
355 # way to correlate test results if the test suite is edited.
356 my (@times) = @$ref_times;
357 my (@comments) = @$ref_comments;
358 shift @times; # offset by 1: the first result becomes $times[0];
359 shift @comments;
360 for ( my $i=0; $i<=@times; $i++ ) {
361 if ( not defined $comments[$i] ) { $comments[$i] = ''; }
362 $comments[$i] =~ s/\\/\\\\/g; # escape all backslashes
363 $comments[$i] =~ s/\"/\\\"/g; # escape all double quotes
364 }
365 my ( $line );
366 my $file_in = $self->{'file_in'};
367 my $file_out = $self->{'file_out'};
368 $test_name =~ s{^t/spec/}{}; # eg 'S02-literals/numeric.t'
369 my $test_separator;
370 if ( $self->{'Last_test_loaded'} eq '' ) {
371 $test_separator = "\n";
372 }
373 else {
374 $test_separator = ",\n";
375 }
376 while ( not eof($file_in) and $self->{'Last_test_loaded'} lt $test_name ) {
377 $line = <$file_in>; chomp $line;
378 if ( $line =~ m/^\s\s"(.+)":.$/ ) {
379 $self->{'Last_test_loaded'} = $1;
380 }
381 }
382 my @logged_results;
383 if ( not eof($file_in) and $self->{'Last_test_loaded'} eq $test_name ) {
384 my $line = <$file_in>; chomp $line;
385 while ( not eof($file_in) and $line =~ m/^\s\s\s\[(\d+),\[(.+?)\],?/ ) {
386 my $test_number = $1;
387 my @timings = split /,/ , $2;
388 $logged_results[$test_number-1] = [ @timings ];
389 $line = <$file_in>; chomp $line;
390 }
391 }
392 my $microseconds = [];
393 my $testcount = @times;
394 for ( my $test_number=0; $test_number<$testcount; $test_number++) {
395 unless ( defined($times[$test_number]) ) { $times[$test_number] = 0; }
396 my ( @times_in_file );
397 if ( defined @{$logged_results[$test_number]} ) {
398 @times_in_file = ( @{$logged_results[$test_number]} );
399 }
400 push @times_in_file, $times[$test_number];
401 if ( not defined( $times_in_file[0] ) ) { shift @times_in_file; }
402 # Delete the oldest test timings if there are too many.
403 while ( @times_in_file > $self->{'Timings'} ) { shift @times_in_file; }
404 $$microseconds[$test_number] = [ @times_in_file ];
405 }
406 my $test_number = 1; # start from number 1 again
407 print $file_out
408 $test_separator .
409 qq' "$test_name":[\n' .
410 join(",\n", map {' ['.$test_number++.',['.join(',',@$_).'],"'.$comments[$test_number-2].'"]'} @$microseconds) .
411 qq'\n ]';
412 }
413
414 # Finish simple relative benchmarking. Called after the first test
415 sub end {
416 my $self = shift;
417 my $file_in = $self->{'file_in'};
418 my $file_out = $self->{'file_out'};
419 print $file_out "\n }\n}\n";
420 close $file_out or warn $!;
421 close $file_in or warn $!;
422 unlink 'docs/test_summary.times';
423 rename 'docs/test_summary.times.tmp', 'docs/test_summary.times';
424 }
425
426 # Report on simple relative benchmarking. Does the --view option
427 sub view
428 {
429 my $choice = '1'; my $choiceA; my $choiceB;
430 do {
431 my ($input, $output, $t, @timings, $script, @z, @sorted, @runs);
432 my @ordername = ('', 'sorted by time', 'sorted by %change', 'sorted by time change', 'in test order' );
433 open($input, '<', 'docs/test_summary.times') or die "$0 cannot open docs/test_summary.times\n";
434 while (<$input>) { # custom parser to avoid dependency on JSON.pm
435 # a commit identification line
436 if (/^\s\s\[\"([^"]*)\",\d+,\"([^"]*)\"/) { push @runs, { 'time'=>$1, 'comment'=>$2 }; }
437 # test script name
438 if (/^\s\s\"(.+)\":\[$/x) { $script = $1; }
439 # individual test times
440 if (/^\s\s\s\[(\d+),\[([0-9,]+)\],\"(.*)\"\],/x) {
441 unless (defined $choiceA) { $choiceB = $#runs; $choiceA = $choiceB-1; }
442 my $testnumber = $1;
443 my @times = split /,/, $2;
444 push @times, 0 while @times < 5;
445 my $testcomment = $3;
446 if ($times[$choiceA] > 0 && $times[$choiceB] > 0) {
447 push @timings, [ [@times], $testcomment, $testnumber, $script];
448 }
449 }
450 }
451 close($input);
452 @z=(); # Prepare to sort using a Schwartzian transform
453 if ($choice eq '1') { # by execution time
454 for my $t ( @timings ) { push @z, $$t[0][$choiceB]; }
455 }
456 elsif ($choice eq '2') { # by relative speedup/slowdown
457 for my $t ( @timings ) { push @z, ($$t[0][$choiceB]-$$t[0][$choiceA])/$$t[0][$choiceA]; }
458 }
459 elsif ($choice eq '3') { # by absolute speedup/slowdown
460 for my $t ( @timings ) { push @z, ($$t[0][$choiceB]-$$t[0][$choiceA]); }
461 }
462 else {
463 @sorted = @timings; # choice '4' is unsorted, meaning in order of execution
464 }
465 @sorted = @timings[ sort { $z[$a] <=> $z[$b] } 0..$#timings ] if @z;
466 # Send the results to 'less' for viewing
467 open $output, ">", "/tmp/test_summary.$$" or die "$0 cannot output to 'less'\n";
468 print $output "Microseconds and relative change of spec tests $ordername[$choice]. Commits:\n";
469 print $output "A: $runs[$choiceA]{'time'} $runs[$choiceA]{'comment'}\n";
470 print $output "B: $runs[$choiceB]{'time'} $runs[$choiceB]{'comment'}\n";
471 print $output " A B Chg Test description (script#test)\n";
472 for $t (@sorted) {
473 printf $output "%6d %5d %+3.0f%% %s (%s#%d)\n", $$t[0][$choiceA], $$t[0][$choiceB],
474 ($$t[0][$choiceB]-$$t[0][$choiceA])*100/$$t[0][$choiceA], $$t[1], $$t[3], $$t[2];
475 }
476 close $output;
477 system "less --chop-long-lines /tmp/test_summary.$$";
478 do { # Prompt for user choice of sort order or commits
479 print 'view: sort by 1)time 2)%change 3)change 4)none, other 5)commits q)uit> ';
480 $choice = <STDIN>; chomp $choice;
481 if ($choice eq '5') { # choose a commit
482 for (my $r=0; $r<@runs; ++$r) {
483 print "$r: $runs[$r]{'time'} $runs[$r]{'comment'}\n";
484 }
485 print 'commit for column A: ';
486 $choiceA = <STDIN>; chomp $choiceA;
487 print 'commit for column B: ';
488 $choiceB = <STDIN>; chomp $choiceB;
489 }
490 } while index('5', $choice) >= 0; # if user chose commits, must still choose sort order
491 } while index('1234', $choice) >= 0; # if valid sort order (not 'q') then do another report
492 }
493
494 package main;
495
496 =pod
497
498 =head1 NAME
499
500 tools/test_summary.pl -- run spectests and make statistical reports
501
502 =head1 DESCRIPTION
503
504 This test harness written in Perl 5, runs the Perl 6 specification test
505 suite. It uses the same Test Anything Protocol (TAP) as for example
506 L<TAP::Harness>, but does not depend those modules.
507
508 The names of the tests are listed in t/spectest.data, or another file
509 whose name is passed on the command line.
510
511 =head2 OUTPUT
512
513 The harness prints the name of each test script before running it.
514 After completion it prints the total number of tests passed, failed,
515 to do, skipped, and planned. The descriptions of any tests failed,
516 skipped or left to do are also listed.
517
518 After running all the tests listed, the harness prints a set of
519 subtotals per Synopsis.
520
521 If you set the REV environment variable (with the first 7 characters of
3b41fee Will Coleda remove rakudo-specific fossils
coke authored
522 a git commit id), the harness prints an additional set of grand
bdf46c2 Will Coleda Copy tools/test_summary.pl from rakudo
coke authored
523 totals suitable for adding to F<docs/spectest_progress.csv>.
524
525 =head1 SIMPLE RELATIVE BENCHMARKING
526
527 Too little information can mislead, hence this self deprecating title.
528 For example, these measurements overlook variation in test times
529 ('jitter'), kernel versus user process times, and measurement overheads.
530 But these results are better than no information at all.
531
532 If activated, this tool logs the most recent 5 timings in microseconds
533 in F<docs/test_summary.times> in a specific JSON format, for later
534 analysis. Measurement and logging add less than 2% to the testing time
535 and makes a log file of about 2.5MB.
536
537 =head2 Methods
538
539 =head3 begin
540
541 Accepts an optional parameter, the number of timings per test to keep.
542 Creates a temporary file for new results, and returns an object that
543 updates the log file. (F<begin> acts as the constructor).
544
545 =head3 log_script_times
546
547 Takes these parameters: test script name, reference to an array of times
548 in microseconds, reference to an array of test description strings.
549 Appends the results to the temporary log file.
550
551 =head3 end
552
553 Closes and renames the temporary log file.
554
555 =head2 Timing results file
556
557 All results are stored in F<docs/test_summary.times> in a specific JSON
558 format. With 35000 test result lines and 5 runs it occupies just under
559 2.5 MB.
560
561 Here is an example with a few semi fictitious results:
562
563 {"test_history":[
564 ["2010-05-05 10:15:45",46276,"925629d Make $x does (R1, R2) work."],
565 ["2010-05-07 08:58:07",46276,"5713af2 run two more test files"],
566 ["2010-05-08 18:08:43",46405,"ab23221 bump PARROT_REVISION"],
567 ["2010-05-09 05:53:25",46405,"c49d32b run S04-phasers/rvalues.t"],
568 ["2010-05-10 00:44:46",46405,"118f4aa Overhaul sqrt for Numeric / Real."]
569 ],
570 "test_microseconds":{
571 "S02-builtin_data_types/anon_block.rakudo":[
572 [1,[6139,7559,6440,6289,5520],"The object is-a 'Sub()'"],
573 [2,[6610,6599,6690,6580,6010],"sub { } works"]
574 ],
575 "S02-builtin_data_types/array.rakudo":[
576 [1,[9100,8889,9739,9140,9169],"for 1, 2, 3 does 3 iterations"],
577 [2,[5650,5599,6119,9819,5140],"for (1, 2, 3).item 3 iterations"],
578 [3,[3920,3770,4190,4410,3350],"for [1, 2, 3] does one iteration"]
579 ]
580 }
581 }
582
583 The "test_history" section lists the starting times for all the runs of
584 F<tools/test_summary.pl> that are recorded in the file. Then the
585 "test_microseconds" records show each test filename, possibly fudged,
586 followed by the test numbers, followed by the times obtained from each
587 run. If a test has fewer than the usual number of timings, the timings
588 will be from the most recent test runs.
589
590 The file is read and written by custom code and not a JSON module, to
591 reduce dependencies. Altering the file format might cause reading to
592 fail and could result in data loss. General purpose JSON parsers should
593 be able to read the data. For example the following ranks the tests
594 from best speedup to worst slowdown.
595
596 #!/usr/bin/perl
597 use File::Slurp qw( slurp );
598 use JSON;
599 my $log_text = slurp('docs/test_summary.times');
600 my $log = JSON->new->decode( $log_text );
601 # Flatten the data structure to a 2-D array of nonzero test times
602 my @timings;
603 my $script_hash = $$log{'test_microseconds'};
604 for my $script_name ( sort keys %$script_hash ) {
605 my $test_list = $$script_hash{$script_name};
606 for my $t ( @$test_list ) {
607 my $times_count = @{$$t[1]};
608 if ( $times_count >= 2 and ${$$t[1]}[$times_count-1] > 0 ) {
609 push @timings, [$script_name, $$t[0], $$t[2], ${$$t[1]}[$times_count-2], ${$$t[1]}[$times_count-1] ];
610 }
611 }
612 }
613 # Sort the timings into improved/worsened order with a Schwartzian transform
614 my @z; for my $t ( @timings ) { push @z, ($$t[4]-$$t[3])/$$t[4]; }
615 my @sorted = @timings[ sort { $z[$a] <=> $z[$b] } 0..$#timings ];
616 # Display the results: quicker is minus, slower is plus.
617 for my $s ( @sorted ) {
618 printf "%+3.0f%% %6d %6d %s:%d:%s\n",
619 ($$s[4]-$$s[3])*100/$$s[3], $$s[3], $$s[4], $$s[0], $$s[1], $$s[2];
620 } # %change, prev-time, latest-time, script, test-num, test-desc
621
622 A second example shows another way to read the results file, and ranks
623 the tests from most to least consistent in execution time.
624
625 #!/usr/bin/perl
626 use JSON;
627 my $log_text = qx{$^X -MExtUtils::Command -e cat docs/test_summary.times};
628 my $log = JSON->new->decode( $log_text );
629 # Flatten the data structure to a 2-D array of nonzero test times
630 my @timings;
631 my $script_hash = $$log{'test_microseconds'};
632 for my $script_name ( sort keys %$script_hash ) {
633 my $test_list = $$script_hash{$script_name};
634 for my $t ( @$test_list ) {
635 my $times_count = @{$$t[1]};
636 if ( $times_count >= 2 and ${$$t[1]}[$times_count-1] > 0 ) {
637 my $min = my $max = ${$$t[1]}[0];
638 for my $i (1..$times_count-1) {
639 $min = ${$$t[1]}[$i] if $min > ${$$t[1]}[$i];
640 $max = ${$$t[1]}[$i] if $max < ${$$t[1]}[$i];
641 }
642 push @timings, [$script_name, $$t[0], $$t[2], $min, $max ] if $min > 0;
643 }
644 }
645 }
646 # Sort the timings into most/least consistent order by Schwartzian transform
647 my @z; for my $t ( @timings ) { push @z, ($$t[4]-$$t[3])/$$t[3]; }
648 my @sorted = @timings[ sort { $z[$a] <=> $z[$b] } 0..$#timings ];
649 # Display the results from most to least consistent
650 for my $s ( @sorted ) {
651 printf "%3.1f%% %6d %6d %s:%d:%s\n",
652 ($$s[4]-$$s[3])*100/$$s[3], $$s[3], $$s[4], $$s[0], $$s[1], $$s[2];
653 } # %difference, min-time, max-time, script, test-num, test-desc
654
655 =head2 TODO
656
657 Detect changes in number of tests or descriptions of tests in each
658 test script, and discard all previous results for that script if there
659 has been a change. Consider whether to log total execution time per
660 test script.
661
662 Analyse and report useful results, such as the slowest n tests.
663
664 Parse the `say now` output as well as `print pir::__time()`.
665
666 =head1 SEE ALSO
667
668 The L<perlperf> module. The L<http://json.org/> site.
669
670 =cut
Something went wrong with that request. Please try again.