Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
tree: 51fc070843
Fetching contributors…

Cannot retrieve contributors at this time

file 670 lines (606 sloc) 27.394 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670
#!/usr/bin/perl

# Copyright (C) 2004-2011, The Perl Foundation.

## The "make spectest" target tells us how many tests we failed
## (hopefully zero!), but doesn't say how many were actually passed.
## This script runs the spectest tests and summarizes
## passed, failed, todoed, skipped, executed and planned test results.
##
## Usage:
## tools/test_summary [--timing | --view] <implementation> [testlist]
##
## The --timing option enables microsecond timing per test saved
## in docs/test_summary.times.
## The --view option renders docs/test_summary.times in various reports
## If supplied, C<testlist> identifies an alternate list of tests
## to use (e.g., t/localtest.data).

## Rakudo Note:
## Using this script with rakudo requires setting the PERL6LIB env var
## to point to its lib directory so we can find Test.pm

use strict;
use warnings;
use Time::Local;
use Time::HiRes;
use Getopt::Long;

my $timing;
my $view;
unless (GetOptions('timing' => \$timing, 'view' => \$view)) {
    die "$0 cannot handle the unknown option\n";
}
if ($view) { Simple::Relative::Benchmarking::view(); exit(0); }

my $benchmark;
# Comment out the next line to skip benchmarking; see docs below
$benchmark = Simple::Relative::Benchmarking::begin() if $timing;

# Which implementation are we running?
my $implementation = $ARGV[0] ||
    die "Must specify an implementation";

# Build the list of test scripts to run in @tfiles
my $testlist = $ARGV[1] || 't/spectest.data';
my $fh;
open($fh, '<', $testlist) || die "Can't read $testlist: $!";
my (@tfiles, %tname); # @tfiles lists all test file names before fudging
while (<$fh>) {
    /^ *#/ && next;
    my ($specfile) = split ' ', $_;
    next unless $specfile;
    push @tfiles, "t/spec/$specfile";
}
close $fh or die $!;

# Fudge any implementation specific tests by running the fudgeall script
{
    my $cmd = join ' ', $^X, 't/spec/fudgeall', $implementation, @tfiles;
    # Fudgeall prints the name of each test script, but changes the name
    # ending to match the implementation instead of .t if tests were fudged.
    print "$cmd\n";
    @tfiles = split ' ', `$cmd`; # execute fudgeall, collect test names
}

# Put test names in %tname, with the 't/spec/' removed from the start
# and truncated to 49 characters. Keep track of the maximum name length.
@tfiles = sort @tfiles;
my $max = 0;
for my $tfile (@tfiles) {
    my $tname = $tfile;
    $tname =~ s{^t/spec/}{};
    $tname = substr($tname, 0, 49);
    if (length($tname) > $max) {
        $max = length($tname);
    }
    $tname{$tfile} = $tname;
}

# Prepare arrays and hashes to gather and accumulate test statistics
my @col = qw(pass fail todo skip plan spec);
my @syn = qw(S01 S02 S03 S04 S05 S06 S07 S09 S10 S11 S12 S13 S14 S16 S17 S19 S24 S26 S28 S29 S32 int);
my %syn; # number of test scripts per Synopsis
my %sum; # total pass/fail/todo/skip/test/plan per Synposis
my $syn;
for $syn (@syn) {
    $syn{$syn} = 0;
    for my $col (@col) {
        $sum{"$syn-$col"} = 0;
    }
}
$syn = ''; # to reliably trigger the display of column headings

# Execute all test scripts, aggregate the results, display the failures
$| = 1;
my ( @fail, @plan_hint );
my %plan_per_file;
for my $tfile (@tfiles) {
    my $th;
    open($th, '<', $tfile) || die "Can't read $tfile: $!\n";
    my ($pass,$fail,$todo,$skip,$plan,$abort,$bonus) = (0,0,0,0,0,0,0);
    my $no_plan = 0; # planless works, but is unhelpful for statistics
    # http://www.shadowcat.co.uk/blog/matt-s-trout/a-cunning-no_plan/
    while (<$th>) { # extract the number of tests planned
        if (/^\s*plan\D*(\d+)/) { $plan = $1; last; }
        elsif (/^\s*plan\s+\*;/) { $no_plan = 1; last; }
    }
    close $th or die $!;
    my $tname = $tname{$tfile};
    # Repeat the column headings at the start of each Synopsis
    if ( $syn ne substr($tname, 0, 3) ) {
        $syn = substr($tname, 0, 3);
        printf( "%s pass fail todo skip plan\n", ' ' x $max );
        unless ( exists $syn{$syn} ) {
            push @fail, "note: test_summary.pl \@syn does not have $syn";
        }
    }
    $syn{$syn}++;
    printf "%s%s..", $tname, '.' x ($max - length($tname));
    my $cmd = "./perl6 $tfile";
    # Run the test, collecting all stdout in @results
    my @results = split "\n", qx{$cmd};
    my (%skip, %todopass, %todofail);
    my ($time1, $time2, $testnumber, $test_comment ) = ( 0, 0, 0, '' );
    my @times = (); my @comments = ();
    for (@results) {
        # Pass over the optional line containing "1..$planned"
        if (/^1\.\.(\d+)/) { $plan = $1 if $1 > 0; next; }
        # Handle lines containing test times
        if (/^# t=(\d+)/) {
            my $microseconds = $1;
            if ( $testnumber > 0 ) {
                # Do this only if the time was after a test result
                $times[ $testnumber] = $microseconds;
                $comments[$testnumber] = $test_comment;
                $testnumber = 0; # must see require another "ok $n" first
            }
            next;
        }
        # Ignore lines not beginning with "ok $$test" or "not ok $test"
        next unless /^(not )?ok +(\d+)/;
        if (/#\s*SKIP\s*(.*)/i) { $skip++; $skip{$1}++; }
        elsif (/#\s*TODO\s*(.*)/i) { $todo++;
            my $reason = $1;
            if (/^ok /) { $todopass{$reason}++ }
            else { $todofail{$reason}++ }
        }
        elsif (/^not ok +(.*)/) { $fail++; push @fail, "$tname $1"; }
        elsif (/^ok +(\d+) - (.*)$/) {
            $pass++; $testnumber = $1; $test_comment = $2;
        }
        elsif (/^ok +(\d+)$/) {
            $pass++; $testnumber = $1; $test_comment = "";
        }
    }
    my $test = $pass + $fail + $todo + $skip;
    if ($plan > $test) {
        $abort = $plan - $test;
        $fail += $abort;
        push @fail, "$tname aborted $abort test(s)";
    }
    elsif ($plan < $test) {
        $bonus = $test - $plan;
        push @fail, "$tname passed $bonus unplanned test(s)";
    }
    if ($no_plan) {
        push @plan_hint, "'plan *;' could become 'plan $plan;' in $tname";
    }
    printf "%4d %4d %4d %4d %4d\n",
        $pass, $fail, $todo, $skip, $plan;
    $sum{'pass'} += $pass; $sum{"$syn-pass"} += $pass;
    $sum{'fail'} += $fail; $sum{"$syn-fail"} += $fail;
    $sum{'todo'} += $todo; $sum{"$syn-todo"} += $todo;
    $sum{'skip'} += $skip; $sum{"$syn-skip"} += $skip;
    $sum{'plan'} += $plan; $sum{"$syn-plan"} += $plan;
    {
        my $f = $tfile;
        $f =~ s/\.$implementation$/.t/;
        $plan_per_file{$f} = $plan;
    }
    for (keys %skip) {
        printf " %3d skipped: %s\n", $skip{$_}, $_;
    }
    for (keys %todofail) {
        printf " %3d todo : %s\n", $todofail{$_}, $_;
    }
    for (keys %todopass) {
        printf " %3d todo PASSED: %s\n", $todopass{$_}, $_;
    }
    if ($abort) {
        printf " %3d tests aborted (missing ok/not ok)\n", $abort;
    }
    if ($bonus) {
        printf " %3d tests more than planned were run\n", $bonus;
    }
    defined $benchmark && $benchmark->log_script_times($tfile,\@times,\@comments);
} # for my $tfile (@tfiles)
defined $benchmark && $benchmark->end(); # finish simple relative benchmarking

# Calculate plan totals from test scripts grouped by Synopsis and overall.
# This ignores any test list and processes all unfudged files in t/spec/.
# Implementing 'no_plan' or 'plan *' in test scripts makes this total
# inaccurate.
for my $syn (sort keys %syn) {
    my $grepcmd = "grep ^plan t/spec/$syn*/* -rHn"; # recurse, always say filename, include line number for troubleshooting
    my @grep_output = `$grepcmd`; # gets an array of all the plan lines
    my $total_tests_planned_per_synopsis = 0;
    for (@grep_output) {
        # Most test scripts have a conventional 'plan 42;' or so near
        # the beginning which is what we need. Unfortunately some have
        # 'plan $x*$y;' or so, which we cannot dynamically figure out.

        # Example grep output: t/spec/S02-names/our.t:4:plan 10;
        # Extract the filename and plan count from that if possible.
        if ( m/ ^ ([^:]*) : \d+ : plan (.*) $ /x ) {
            my ( $filename, $planexpression ) = ( $1, $2 );
            my $script_planned_tests = 0;
            if ( $filename =~ m/\.t$/ ) {
                if ( $planexpression =~ m/ ^ \s* (\d+) \s* ; $ /x ) {
                    # A conventional 'plan 42;' type of line
                    $script_planned_tests = $1;
                }
                else {
                    # It is some other plan argument, either * or variables.
                    # A workaround is to get the actual number of tests run
                    # from the output and just assume is the same number,
                    # but sometimes that is missing too.
                    if ( exists $plan_per_file{$filename} ) {
                        $script_planned_tests = $plan_per_file{$filename};
                    }
                }
            }
            $total_tests_planned_per_synopsis += $script_planned_tests;
        }
    }
    $sum{"$syn-spec"} = $total_tests_planned_per_synopsis;
    $sum{'spec'} += $total_tests_planned_per_synopsis;
}

# Planless testing (eg 'plan *;') is useless for static analysis, making
# tools jump through hoops to calculate the number of planned tests.
# This part display hints about the scripts that could easily be edited
# make life easier on the reporting side.
# A test suite author can follow the hints and write the automatically
# counted number of tests into the test script, changing it back from
# planless to planned.

if (@plan_hint) {
    print "----------------\n";
    foreach (@plan_hint) {
        print " $_\n";
    }
}

# Show test totals grouped by Synopsys, followed by overall totals
print "----------------\n";
my $sumfmt = qq(%-11.11s %6s,%6s,%6s,%6s,%6s,%6s\n);
printf $sumfmt, qq{"Synopsis",}, map { qq{"$_"} } @col;
for my $syn (sort keys %syn) {
    printf $sumfmt, qq{"$syn",}, map { $sum{"$syn-$_"} } @col;
}
my $total = scalar(@tfiles).' regression files';
printf $sumfmt, qq{"total",}, map { $sum{$_} } @col;
print "----------------\n";

# Optionally show the statistics that can be manually appended to
# docs/spectest-progress.csv
if ($ENV{'REV'}) {
    my @gmt = gmtime;
    my $testdate = sprintf '"%4d-%02d-%02d %02d:%02d"', $gmt[5]+1900,
        $gmt[4]+1, $gmt[3], $gmt[2], $gmt[1];
    my $filecount = scalar(@tfiles);
    my $passpercent = 100 * $sum{'pass'} / $sum{'spec'};
    print join(',', $ENV{'REV'}, (map { $sum{$_} } @col),
        $filecount), "\n";
    printf "spectest-progress.csv update: " .
        "%d files, %d (%.1f%% of %d) pass, %d fail\n",
        $filecount, $sum{'pass'}, $passpercent, $sum{'spec'}, $sum{'fail'};
}

# List descriptions of the tests that failed
if (@fail) {
    print "Failure summary:\n";
    foreach (@fail) {
        print "$_\n";
    }
}
else {
    print "No failures!\n";
}

# End of main program

#-------------------- Simple Relative Benchmarking ---------------------

package Simple::Relative::Benchmarking;

# begin
# Initialize simple relative benchmarking. Called before the first test
sub begin {
    my $timings = shift || 5; # number of timings to keep (default 5)
    my $self = {};
    my @test_history;
    $self->{'Timings'} = $timings;
    $self->{'Last_test_loaded'} = '';

    if ( open( $self->{'file_in'}, '<', 'docs/test_summary.times') ) {
        my $file_in = $self->{'file_in'};
        my $line = <$file_in>; chomp $line;
        if ( $line =~ m/{"test_.+":\[/i ) { # should be Test_history
            $line = <$file_in>; chomp $line;
            while ( $line =~ m/\s\s(.+\d\d\d\d-\d\d-\d\d.\d\d:\d\d:\d\d.+)/ ) {
                my $history_line = $1;
                $history_line =~ s/,$//; # trim possible trailing comma
                push @test_history, $history_line;
                $line = <$file_in>; chomp $line;
            } # ends on the ' ],' line after the test_history
            $line = <$file_in>; chomp $line;
            # if ( $line =~ m/ "test_microseconds":{/i ) {
            # warn "begin reached 'test_microseconds'\n";
            # }
        }
    }
    open( $self->{'file_out'}, '>', 'docs/test_summary.times.tmp') or die "cannot create docs/test_summary.times.tmp: $!";
    my $parrot_version = qx{./perl6 -e'print \$*VM<config><revision>'};
    my $impl_version = qx{git log --pretty=oneline --abbrev-commit --max-count=1 .}; chomp $impl_version;
    $impl_version =~ s/^([0-9a-f])+\.\.\./$1/; # delete possible ...
    $impl_version =~ s/\\/\\\\/g; # escape all backslashes
    $impl_version =~ s/\"/\\\"/g; # escape all double quotes
    my $file_out = $self->{'file_out'};
    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime(time());
    push @test_history, sprintf("[\"%4d-%02d-%02d %02d:%02d:%02d\",%d,\"%s\"]",
        $year+1900, $mon+1, $mday, $hour, $min, $sec,
        $parrot_version, $impl_version );
    # Delete the oldest test test_history if there are too many.
    while ( @test_history > $self->{'Timings'} ) { shift @test_history; }
    print $file_out qq!{"test_history":[\n!;
    print $file_out " " . join(",\n ",@test_history) . "\n ],\n";
    print $file_out qq! "test_microseconds":{!;
    # tell Test.pm to output per-test timestamps
    $ENV{'PERL6_TEST_TIMES'} = 'true';
    return bless $self;
}

# Track simple relative benchmarking. Called after running each test script.
sub log_script_times {
    my $self = shift;
    my $test_name = shift;
    my $ref_times = shift;
    my $ref_comments = shift;
    # Make local arrays of the execution times in microseconds, and test
    # comments (or descriptions). Since tests are being added to and
    # removed from the test suite, test numbers change over time. The
    # comments are sometimes empty or duplicated, but they are the only
    # way to correlate test results if the test suite is edited.
    my (@times) = @$ref_times;
    my (@comments) = @$ref_comments;
    shift @times; # offset by 1: the first result becomes $times[0];
    shift @comments;
    for ( my $i=0; $i<=@times; $i++ ) {
        if ( not defined $comments[$i] ) { $comments[$i] = ''; }
        $comments[$i] =~ s/\\/\\\\/g; # escape all backslashes
        $comments[$i] =~ s/\"/\\\"/g; # escape all double quotes
    }
    my ( $line );
    my $file_in = $self->{'file_in'};
    my $file_out = $self->{'file_out'};
    $test_name =~ s{^t/spec/}{}; # eg 'S02-literals/numeric.t'
    my $test_separator;
    if ( $self->{'Last_test_loaded'} eq '' ) {
        $test_separator = "\n";
    }
    else {
        $test_separator = ",\n";
    }
    while ( not eof($file_in) and $self->{'Last_test_loaded'} lt $test_name ) {
        $line = <$file_in>; chomp $line;
        if ( $line =~ m/^\s\s"(.+)":.$/ ) {
            $self->{'Last_test_loaded'} = $1;
        }
    }
    my @logged_results;
    if ( not eof($file_in) and $self->{'Last_test_loaded'} eq $test_name ) {
        my $line = <$file_in>; chomp $line;
        while ( not eof($file_in) and $line =~ m/^\s\s\s\[(\d+),\[(.+?)\],?/ ) {
            my $test_number = $1;
            my @timings = split /,/ , $2;
            $logged_results[$test_number-1] = [ @timings ];
            $line = <$file_in>; chomp $line;
        }
    }
    my $microseconds = [];
    my $testcount = @times;
    for ( my $test_number=0; $test_number<$testcount; $test_number++) {
        unless ( defined($times[$test_number]) ) { $times[$test_number] = 0; }
        my ( @times_in_file );
        if ( defined @{$logged_results[$test_number]} ) {
            @times_in_file = ( @{$logged_results[$test_number]} );
        }
        push @times_in_file, $times[$test_number];
        if ( not defined( $times_in_file[0] ) ) { shift @times_in_file; }
        # Delete the oldest test timings if there are too many.
        while ( @times_in_file > $self->{'Timings'} ) { shift @times_in_file; }
        $$microseconds[$test_number] = [ @times_in_file ];
    }
    my $test_number = 1; # start from number 1 again
    print $file_out
        $test_separator .
        qq' "$test_name":[\n' .
        join(",\n", map {' ['.$test_number++.',['.join(',',@$_).'],"'.$comments[$test_number-2].'"]'} @$microseconds) .
        qq'\n ]';
}

# Finish simple relative benchmarking. Called after the first test
sub end {
    my $self = shift;
    my $file_in = $self->{'file_in'};
    my $file_out = $self->{'file_out'};
    print $file_out "\n }\n}\n";
    close $file_out or warn $!;
    close $file_in or warn $!;
    unlink 'docs/test_summary.times';
    rename 'docs/test_summary.times.tmp', 'docs/test_summary.times';
}

# Report on simple relative benchmarking. Does the --view option
sub view
{
    my $choice = '1'; my $choiceA; my $choiceB;
    do {
        my ($input, $output, $t, @timings, $script, @z, @sorted, @runs);
        my @ordername = ('', 'sorted by time', 'sorted by %change', 'sorted by time change', 'in test order' );
        open($input, '<', 'docs/test_summary.times') or die "$0 cannot open docs/test_summary.times\n";
        while (<$input>) { # custom parser to avoid dependency on JSON.pm
            # a commit identification line
            if (/^\s\s\[\"([^"]*)\",\d+,\"([^"]*)\"/) { push @runs, { 'time'=>$1, 'comment'=>$2 }; }
            # test script name
            if (/^\s\s\"(.+)\":\[$/x) { $script = $1; }
            # individual test times
            if (/^\s\s\s\[(\d+),\[([0-9,]+)\],\"(.*)\"\],/x) {
                unless (defined $choiceA) { $choiceB = $#runs; $choiceA = $choiceB-1; }
                my $testnumber = $1;
                my @times = split /,/, $2;
                push @times, 0 while @times < 5;
                my $testcomment = $3;
                if ($times[$choiceA] > 0 && $times[$choiceB] > 0) {
                    push @timings, [ [@times], $testcomment, $testnumber, $script];
                }
            }
        }
        close($input);
        @z=(); # Prepare to sort using a Schwartzian transform
        if ($choice eq '1') { # by execution time
            for my $t ( @timings ) { push @z, $$t[0][$choiceB]; }
        }
        elsif ($choice eq '2') { # by relative speedup/slowdown
            for my $t ( @timings ) { push @z, ($$t[0][$choiceB]-$$t[0][$choiceA])/$$t[0][$choiceA]; }
        }
        elsif ($choice eq '3') { # by absolute speedup/slowdown
            for my $t ( @timings ) { push @z, ($$t[0][$choiceB]-$$t[0][$choiceA]); }
        }
        else {
            @sorted = @timings; # choice '4' is unsorted, meaning in order of execution
        }
        @sorted = @timings[ sort { $z[$a] <=> $z[$b] } 0..$#timings ] if @z;
        # Send the results to 'less' for viewing
        open $output, ">", "/tmp/test_summary.$$" or die "$0 cannot output to 'less'\n";
        print $output "Microseconds and relative change of spec tests $ordername[$choice]. Commits:\n";
        print $output "A: $runs[$choiceA]{'time'} $runs[$choiceA]{'comment'}\n";
        print $output "B: $runs[$choiceB]{'time'} $runs[$choiceB]{'comment'}\n";
        print $output " A B Chg Test description (script#test)\n";
        for $t (@sorted) {
            printf $output "%6d %5d %+3.0f%% %s (%s#%d)\n", $$t[0][$choiceA], $$t[0][$choiceB],
                ($$t[0][$choiceB]-$$t[0][$choiceA])*100/$$t[0][$choiceA], $$t[1], $$t[3], $$t[2];
        }
        close $output;
        system "less --chop-long-lines /tmp/test_summary.$$";
        do { # Prompt for user choice of sort order or commits
            print 'view: sort by 1)time 2)%change 3)change 4)none, other 5)commits q)uit> ';
            $choice = <STDIN>; chomp $choice;
            if ($choice eq '5') { # choose a commit
                for (my $r=0; $r<@runs; ++$r) {
                    print "$r: $runs[$r]{'time'} $runs[$r]{'comment'}\n";
                }
                print 'commit for column A: ';
                $choiceA = <STDIN>; chomp $choiceA;
                print 'commit for column B: ';
                $choiceB = <STDIN>; chomp $choiceB;
            }
        } while index('5', $choice) >= 0; # if user chose commits, must still choose sort order
    } while index('1234', $choice) >= 0; # if valid sort order (not 'q') then do another report
}

package main;

=pod

=head1 NAME

tools/test_summary.pl -- run spectests and make statistical reports

=head1 DESCRIPTION

This test harness written in Perl 5, runs the Perl 6 specification test
suite. It uses the same Test Anything Protocol (TAP) as for example
L<TAP::Harness>, but does not depend those modules.

The names of the tests are listed in t/spectest.data, or another file
whose name is passed on the command line.

=head2 OUTPUT

The harness prints the name of each test script before running it.
After completion it prints the total number of tests passed, failed,
to do, skipped, and planned. The descriptions of any tests failed,
skipped or left to do are also listed.

After running all the tests listed, the harness prints a set of
subtotals per Synopsis.

If you set the REV environment variable (with the first 7 characters of
a git commit id), the harness prints an additional set of grand
totals suitable for adding to F<docs/spectest_progress.csv>.

=head1 SIMPLE RELATIVE BENCHMARKING

Too little information can mislead, hence this self deprecating title.
For example, these measurements overlook variation in test times
('jitter'), kernel versus user process times, and measurement overheads.
But these results are better than no information at all.

If activated, this tool logs the most recent 5 timings in microseconds
in F<docs/test_summary.times> in a specific JSON format, for later
analysis. Measurement and logging add less than 2% to the testing time
and makes a log file of about 2.5MB.

=head2 Methods

=head3 begin

Accepts an optional parameter, the number of timings per test to keep.
Creates a temporary file for new results, and returns an object that
updates the log file. (F<begin> acts as the constructor).

=head3 log_script_times

Takes these parameters: test script name, reference to an array of times
in microseconds, reference to an array of test description strings.
Appends the results to the temporary log file.

=head3 end

Closes and renames the temporary log file.

=head2 Timing results file

All results are stored in F<docs/test_summary.times> in a specific JSON
format. With 35000 test result lines and 5 runs it occupies just under
2.5 MB.

Here is an example with a few semi fictitious results:

    {"test_history":[
      ["2010-05-05 10:15:45",46276,"925629d Make $x does (R1, R2) work."],
      ["2010-05-07 08:58:07",46276,"5713af2 run two more test files"],
      ["2010-05-08 18:08:43",46405,"ab23221 bump PARROT_REVISION"],
      ["2010-05-09 05:53:25",46405,"c49d32b run S04-phasers/rvalues.t"],
      ["2010-05-10 00:44:46",46405,"118f4aa Overhaul sqrt for Numeric / Real."]
     ],
     "test_microseconds":{
      "S02-builtin_data_types/anon_block.rakudo":[
       [1,[6139,7559,6440,6289,5520],"The object is-a 'Sub()'"],
       [2,[6610,6599,6690,6580,6010],"sub { } works"]
      ],
      "S02-builtin_data_types/array.rakudo":[
       [1,[9100,8889,9739,9140,9169],"for 1, 2, 3 does 3 iterations"],
       [2,[5650,5599,6119,9819,5140],"for (1, 2, 3).item 3 iterations"],
       [3,[3920,3770,4190,4410,3350],"for [1, 2, 3] does one iteration"]
      ]
     }
    }

The "test_history" section lists the starting times for all the runs of
F<tools/test_summary.pl> that are recorded in the file. Then the
"test_microseconds" records show each test filename, possibly fudged,
followed by the test numbers, followed by the times obtained from each
run. If a test has fewer than the usual number of timings, the timings
will be from the most recent test runs.

The file is read and written by custom code and not a JSON module, to
reduce dependencies. Altering the file format might cause reading to
fail and could result in data loss. General purpose JSON parsers should
be able to read the data. For example the following ranks the tests
from best speedup to worst slowdown.

    #!/usr/bin/perl
    use File::Slurp qw( slurp );
    use JSON;
    my $log_text = slurp('docs/test_summary.times');
    my $log = JSON->new->decode( $log_text );
    # Flatten the data structure to a 2-D array of nonzero test times
    my @timings;
    my $script_hash = $$log{'test_microseconds'};
    for my $script_name ( sort keys %$script_hash ) {
        my $test_list = $$script_hash{$script_name};
        for my $t ( @$test_list ) {
            my $times_count = @{$$t[1]};
            if ( $times_count >= 2 and ${$$t[1]}[$times_count-1] > 0 ) {
                push @timings, [$script_name, $$t[0], $$t[2], ${$$t[1]}[$times_count-2], ${$$t[1]}[$times_count-1] ];
            }
        }
    }
    # Sort the timings into improved/worsened order with a Schwartzian transform
    my @z; for my $t ( @timings ) { push @z, ($$t[4]-$$t[3])/$$t[4]; }
    my @sorted = @timings[ sort { $z[$a] <=> $z[$b] } 0..$#timings ];
    # Display the results: quicker is minus, slower is plus.
    for my $s ( @sorted ) {
        printf "%+3.0f%% %6d %6d %s:%d:%s\n",
           ($$s[4]-$$s[3])*100/$$s[3], $$s[3], $$s[4], $$s[0], $$s[1], $$s[2];
    } # %change, prev-time, latest-time, script, test-num, test-desc

A second example shows another way to read the results file, and ranks
the tests from most to least consistent in execution time.

    #!/usr/bin/perl
    use JSON;
    my $log_text = qx{$^X -MExtUtils::Command -e cat docs/test_summary.times};
    my $log = JSON->new->decode( $log_text );
    # Flatten the data structure to a 2-D array of nonzero test times
    my @timings;
    my $script_hash = $$log{'test_microseconds'};
    for my $script_name ( sort keys %$script_hash ) {
        my $test_list = $$script_hash{$script_name};
        for my $t ( @$test_list ) {
            my $times_count = @{$$t[1]};
            if ( $times_count >= 2 and ${$$t[1]}[$times_count-1] > 0 ) {
                my $min = my $max = ${$$t[1]}[0];
                for my $i (1..$times_count-1) {
                    $min = ${$$t[1]}[$i] if $min > ${$$t[1]}[$i];
                    $max = ${$$t[1]}[$i] if $max < ${$$t[1]}[$i];
                }
                push @timings, [$script_name, $$t[0], $$t[2], $min, $max ] if $min > 0;
            }
        }
    }
    # Sort the timings into most/least consistent order by Schwartzian transform
    my @z; for my $t ( @timings ) { push @z, ($$t[4]-$$t[3])/$$t[3]; }
    my @sorted = @timings[ sort { $z[$a] <=> $z[$b] } 0..$#timings ];
    # Display the results from most to least consistent
    for my $s ( @sorted ) {
        printf "%3.1f%% %6d %6d %s:%d:%s\n",
            ($$s[4]-$$s[3])*100/$$s[3], $$s[3], $$s[4], $$s[0], $$s[1], $$s[2];
    } # %difference, min-time, max-time, script, test-num, test-desc

=head2 TODO

Detect changes in number of tests or descriptions of tests in each
test script, and discard all previous results for that script if there
has been a change. Consider whether to log total execution time per
test script.

Analyse and report useful results, such as the slowest n tests.

Parse the `say now` output as well as `print pir::__time()`.

=head1 SEE ALSO

The L<perlperf> module. The L<http://json.org/> site.

=cut
Something went wrong with that request. Please try again.