From e27a9230d6ad515bf806db0145ec627aa618362b Mon Sep 17 00:00:00 2001 From: Kent Fredric Date: Wed, 17 Sep 2014 21:32:30 +1200 Subject: [PATCH] Initial working model --- .gitignore | 2 + .mailmap | 3 + .travis.yml | 56 ++++ LICENSE | 379 ++++++++++++++++++++++ README.mkdn | 215 ++++++++++++ dist.ini | 214 +++++++++++- dist.ini.meta | 22 ++ examples/math/aggregate.pl | 59 ++++ examples/math/aggregate_histogram.gnuplot | 26 ++ examples/math/math.pl | 88 +++++ examples/math/plot.gnuplot | 49 +++ examples/mkbatch/mkbatch.pl | 59 ++++ examples/mkbatch/plot.gnuplot | 42 +++ examples/shuffle/plot.gnuplot | 42 +++ examples/shuffle/shuffle.pl | 55 ++++ lib/Benchmark/CSV.pm | 345 +++++++++++++++++++- maint/perlcritic.rc.gen.pl | 5 +- misc/perlcritic.deps | 1 + perlcritic.rc | 3 +- t/basic.t | 36 ++ t/basic_hashconstruct.t | 31 ++ t/output_fh_set.t | 48 +++ t/sample_size_set.t | 42 +++ weaver.ini | 2 + 24 files changed, 1800 insertions(+), 24 deletions(-) create mode 100644 .mailmap create mode 100644 .travis.yml create mode 100644 LICENSE create mode 100644 README.mkdn create mode 100644 dist.ini.meta create mode 100644 examples/math/aggregate.pl create mode 100644 examples/math/aggregate_histogram.gnuplot create mode 100644 examples/math/math.pl create mode 100644 examples/math/plot.gnuplot create mode 100644 examples/mkbatch/mkbatch.pl create mode 100644 examples/mkbatch/plot.gnuplot create mode 100644 examples/shuffle/plot.gnuplot create mode 100644 examples/shuffle/shuffle.pl create mode 100644 t/basic.t create mode 100644 t/basic_hashconstruct.t create mode 100644 t/output_fh_set.t create mode 100644 t/sample_size_set.t diff --git a/.gitignore b/.gitignore index 471a563..4e670f7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ .build Benchmark-CSV-* +examples/*/*.csv +examples/*/*.png diff --git a/.mailmap b/.mailmap new file mode 100644 index 0000000..ca81607 --- /dev/null +++ b/.mailmap @@ -0,0 +1,3 @@ +# git help shortlog +# newname oldname + diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..ee8ce89 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,56 @@ +language: perl +matrix: + allow_failures: + - perl: "5.8" + - env: STERILIZE_ENV=0 RELEASE_TESTING=1 AUTHOR_TESTING=1 + - env: STERILIZE_ENV=0 DEVELOPER_DEPS=1 + include: + - perl: "5.21" + env: STERILIZE_ENV=0 COVERAGE_TESTING=1 + - perl: "5.21" + env: STERILIZE_ENV=1 + - perl: "5.8" + env: STERILIZE_ENV=0 + - perl: "5.10" + env: STERILIZE_ENV=0 + - perl: "5.12" + env: STERILIZE_ENV=0 + - perl: "5.14" + env: STERILIZE_ENV=0 + - perl: "5.16" + env: STERILIZE_ENV=0 + - perl: "5.20" + env: STERILIZE_ENV=0 + - perl: "5.21" + env: STERILIZE_ENV=0 + - perl: "5.8" + env: STERILIZE_ENV=1 + - perl: "5.10" + env: STERILIZE_ENV=1 + - perl: "5.20" + env: STERILIZE_ENV=1 + - perl: "5.21" + env: STERILIZE_ENV=0 DEVELOPER_DEPS=1 + - perl: "5.21" + env: STERILIZE_ENV=0 RELEASE_TESTING=1 AUTHOR_TESTING=1 +before_install: + - perlbrew list + - time git clone --depth 10 https://github.com/kentfredric/travis-scripts.git maint-travis-ci + - time git -C ./maint-travis-ci reset --hard master + - time perl ./maint-travis-ci/branch_reset.pl + - time perl ./maint-travis-ci/sterilize_env.pl +install: + - time perl ./maint-travis-ci/install_deps_early.pl + - time perl ./maint-travis-ci/install_deps.pl +before_script: + - time perl ./maint-travis-ci/before_script.pl +script: + - time perl ./maint-travis-ci/script.pl +after_failure: + - perl ./maint-travis-ci/report_fail_ctx.pl +branches: + only: + - "master" + - "build/master" + - "releases" + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f54a236 --- /dev/null +++ b/LICENSE @@ -0,0 +1,379 @@ +This software is copyright (c) 2014 by Kent Fredric . + +This is free software; you can redistribute it and/or modify it under +the same terms as the Perl 5 programming language system itself. + +Terms of the Perl programming language system itself + +a) the GNU General Public License as published by the Free + Software Foundation; either version 1, or (at your option) any + later version, or +b) the "Artistic License" + +--- The GNU General Public License, Version 1, February 1989 --- + +This software is Copyright (c) 2014 by Kent Fredric . + +This is free software, licensed under: + + The GNU General Public License, Version 1, February 1989 + + GNU GENERAL PUBLIC LICENSE + Version 1, February 1989 + + Copyright (C) 1989 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The license agreements of most software companies try to keep users +at the mercy of those companies. By contrast, our General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. The +General Public License applies to the Free Software Foundation's +software and to any other program whose authors commit to using it. +You can use it for your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Specifically, the General Public License is designed to make +sure that you have the freedom to give away or sell copies of free +software, that you receive source code or can get it if you want it, +that you can change the software or use pieces of it in new free +programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of a such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must tell them their rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any program or other work which +contains a notice placed by the copyright holder saying it may be +distributed under the terms of this General Public License. The +"Program", below, refers to any such program or work, and a "work based +on the Program" means either the Program or any work containing the +Program or a portion of it, either verbatim or with modifications. Each +licensee is addressed as "you". + + 1. You may copy and distribute verbatim copies of the Program's source +code as you receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice and +disclaimer of warranty; keep intact all the notices that refer to this +General Public License and to the absence of any warranty; and give any +other recipients of the Program a copy of this General Public License +along with the Program. You may charge a fee for the physical act of +transferring a copy. + + 2. You may modify your copy or copies of the Program or any portion of +it, and copy and distribute such modifications under the terms of Paragraph +1 above, provided that you also do the following: + + a) cause the modified files to carry prominent notices stating that + you changed the files and the date of any change; and + + b) cause the whole of any work that you distribute or publish, that + in whole or in part contains the Program or any part thereof, either + with or without modifications, to be licensed at no charge to all + third parties under the terms of this General Public License (except + that you may choose to grant warranty protection to some or all + third parties, at your option). + + c) If the modified program normally reads commands interactively when + run, you must cause it, when started running for such interactive use + in the simplest and most usual way, to print or display an + announcement including an appropriate copyright notice and a notice + that there is no warranty (or else, saying that you provide a + warranty) and that users may redistribute the program under these + conditions, and telling the user how to view a copy of this General + Public License. + + d) You may charge a fee for the physical act of transferring a + copy, and you may at your option offer warranty protection in + exchange for a fee. + +Mere aggregation of another independent work with the Program (or its +derivative) on a volume of a storage or distribution medium does not bring +the other work under the scope of these terms. + + 3. You may copy and distribute the Program (or a portion or derivative of +it, under Paragraph 2) in object code or executable form under the terms of +Paragraphs 1 and 2 above provided that you also do one of the following: + + a) accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of + Paragraphs 1 and 2 above; or, + + b) accompany it with a written offer, valid for at least three + years, to give any third party free (except for a nominal charge + for the cost of distribution) a complete machine-readable copy of the + corresponding source code, to be distributed under the terms of + Paragraphs 1 and 2 above; or, + + c) accompany it with the information you received as to where the + corresponding source code may be obtained. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form alone.) + +Source code for a work means the preferred form of the work for making +modifications to it. For an executable file, complete source code means +all the source code for all modules it contains; but, as a special +exception, it need not include source code for modules which are standard +libraries that accompany the operating system on which the executable +file runs, or for standard header files or definitions files that +accompany that operating system. + + 4. You may not copy, modify, sublicense, distribute or transfer the +Program except as expressly provided under this General Public License. +Any attempt otherwise to copy, modify, sublicense, distribute or transfer +the Program is void, and will automatically terminate your rights to use +the Program under this License. However, parties who have received +copies, or rights to use copies, from you under this General Public +License will not have their licenses terminated so long as such parties +remain in full compliance. + + 5. By copying, distributing or modifying the Program (or any work based +on the Program) you indicate your acceptance of this license to do so, +and all its terms and conditions. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the original +licensor to copy, distribute or modify the Program subject to these +terms and conditions. You may not impose any further restrictions on the +recipients' exercise of the rights granted herein. + + 7. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of the license which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +the license, you may choose any version ever published by the Free Software +Foundation. + + 8. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to humanity, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + + To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively convey +the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19xx name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the +appropriate parts of the General Public License. Of course, the +commands you use may be called something other than `show w' and `show +c'; they could even be mouse-clicks or menu items--whatever suits your +program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + program `Gnomovision' (a program to direct compilers to make passes + at assemblers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +That's all there is to it! + + +--- The Artistic License 1.0 --- + +This software is Copyright (c) 2014 by Kent Fredric . + +This is free software, licensed under: + + The Artistic License 1.0 + +The Artistic License + +Preamble + +The intent of this document is to state the conditions under which a Package +may be copied, such that the Copyright Holder maintains some semblance of +artistic control over the development of the package, while giving the users of +the package the right to use and distribute the Package in a more-or-less +customary fashion, plus the right to make reasonable modifications. + +Definitions: + + - "Package" refers to the collection of files distributed by the Copyright + Holder, and derivatives of that collection of files created through + textual modification. + - "Standard Version" refers to such a Package if it has not been modified, + or has been modified in accordance with the wishes of the Copyright + Holder. + - "Copyright Holder" is whoever is named in the copyright or copyrights for + the package. + - "You" is you, if you're thinking about copying or distributing this Package. + - "Reasonable copying fee" is whatever you can justify on the basis of media + cost, duplication charges, time of people involved, and so on. (You will + not be required to justify it to the Copyright Holder, but only to the + computing community at large as a market that must bear the fee.) + - "Freely Available" means that no fee is charged for the item itself, though + there may be fees involved in handling the item. It also means that + recipients of the item may redistribute it under the same conditions they + received it. + +1. You may make and give away verbatim copies of the source form of the +Standard Version of this Package without restriction, provided that you +duplicate all of the original copyright notices and associated disclaimers. + +2. You may apply bug fixes, portability fixes and other modifications derived +from the Public Domain or from the Copyright Holder. A Package modified in such +a way shall still be considered the Standard Version. + +3. You may otherwise modify your copy of this Package in any way, provided that +you insert a prominent notice in each changed file stating how and when you +changed that file, and provided that you do at least ONE of the following: + + a) place your modifications in the Public Domain or otherwise make them + Freely Available, such as by posting said modifications to Usenet or an + equivalent medium, or placing the modifications on a major archive site + such as ftp.uu.net, or by allowing the Copyright Holder to include your + modifications in the Standard Version of the Package. + + b) use the modified Package only within your corporation or organization. + + c) rename any non-standard executables so the names do not conflict with + standard executables, which must also be provided, and provide a separate + manual page for each non-standard executable that clearly documents how it + differs from the Standard Version. + + d) make other distribution arrangements with the Copyright Holder. + +4. You may distribute the programs of this Package in object code or executable +form, provided that you do at least ONE of the following: + + a) distribute a Standard Version of the executables and library files, + together with instructions (in the manual page or equivalent) on where to + get the Standard Version. + + b) accompany the distribution with the machine-readable source of the Package + with your modifications. + + c) accompany any non-standard executables with their corresponding Standard + Version executables, giving the non-standard executables non-standard + names, and clearly documenting the differences in manual pages (or + equivalent), together with instructions on where to get the Standard + Version. + + d) make other distribution arrangements with the Copyright Holder. + +5. You may charge a reasonable copying fee for any distribution of this +Package. You may charge any fee you choose for support of this Package. You +may not charge a fee for this Package itself. However, you may distribute this +Package in aggregate with other (possibly commercial) programs as part of a +larger (possibly commercial) software distribution provided that you do not +advertise this Package as a product of your own. + +6. The scripts and library files supplied as input to or produced as output +from the programs of this Package do not automatically fall under the copyright +of this Package, but belong to whomever generated them, and may be sold +commercially, and may be aggregated with this Package. + +7. C or perl subroutines supplied by you and linked into this Package shall not +be considered part of this Package. + +8. The name of the Copyright Holder may not be used to endorse or promote +products derived from this software without specific prior written permission. + +9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +The End + diff --git a/README.mkdn b/README.mkdn new file mode 100644 index 0000000..fb8072d --- /dev/null +++ b/README.mkdn @@ -0,0 +1,215 @@ +# NAME + +Benchmark::CSV - Report raw timing results in CSV-style format for advanced processing. + +# VERSION + +version 0.001000 + +# SYNOPSIS + + use Benchmark::CSV; + + my $benchmark = Benchmark::CSV->new( + output => './test.csv', + sample_size => 10, + ); + + $benchmark->add_instance( 'method_a' => sub {}); + $benchmark->add_instance( 'method_b' => sub {}); + + $benchmark->run_iterations(100_000); + +# RATIONALE. + +I've long found all the other bench-marking utilities well meaning, but easily confusing. + +My biggest misgiving is that they give you one, or two values which it has decided is "the time" your code took, +whether its an average, a median, or some other algorithm, ( Such as in `Benchmark::Dumb` ), they all amount to basically giving +you a data point, which you have to take for granted. + +That data point may also change wildly between test runs due to computer load or other factors. + +Essentially, the flaw as I see it, is trying to convey what is essentially a _spectrum_ of results as a single point. + +They also run each test sequentially, as in: + + start testing -> + + start test one -> + + <-- end test one + + record data + + start test one -> + + <-- end test one + + record data + + <-- stop testing. + +And that strikes me as incredibly prone to the batches getting different results due to CPU loading variations, +such that, any benchmark run on this way on anything other than a perfectly idle processor +without so much as an `init` subsystem stealing CPU time, and with your kernel delivering IO +perfectly the whole time. + +And the final numbers don't really seem to take that into consideration. + +`Benchmark::Dumb` at least gives you variation data, but its rather hard to compare and visualize the results it gives to gain +meaningful insight. + +So, I looked to modeling the data differently, and happened to accidentally throw some hand-collected benchmark data into a +Google Spreadsheet Histogram plot, and found it hugely enlightening on what was really going on. + +One recurring observation I noticed is code run-time seems to have a very lop-sided distribution + + | ++ + | |++ + | | | + | | | + | | | + | | +++ + | | | + | ++ ++++++++ + | + +++++++++++++++++++++++ + 0 +------------------------------------- + 0 + +Which suggests to me, that unlike many things people usually use statistics for, +where you have a bunch of things evenly on both sides of the mode, code has an _inherent_ minimum run time, +which you might see if your system has all factors in "ideal" conditions, and it has a closely following _sub-optimal_ but +_common_ run time, which I imagine you see because the system can't deliver every cycle of code +in perfect situations every time, even the kernel is selfish and says "Well, if I let your code have exactly 100% CPU for as +long as you wanted it, I doubt even kernel space would be able to do anything till you were quite done" +So observing the minimum time `AND` the median seem to me, useful for comparing algorithm efficiency. + +Observing the maximums is useful too, however, those values trend towards being less useful, as they're likely to be impacted by +CPU randomness slowing things down. + +# METHODS + +## `add_instance` + +Add a test block. + + ->add_instance( name => sub { } ); + +**NOTE:** You can only add test instances prior to executing the tests. + +After executing tests, the number of columns and the column headings become `finalized`. + +This is because of how the CSV file is written in parallel with the test batches. + +CSV is written headers first, top to bottom, one column at a time. + +So adding a new column is impossible after the headers have been written without starting over. + +## `new` + +Create a benchmark object. + + my $instance = Benchmark::CSV->new( \%hash ); + my $instance = Benchmark::CSV->new( %hash ); + + %hash = { + sample_size => # number of times to call each sub in a sample + output => # A file path to write to + output_fh => # An output filehandle to write to + }; + +## `sample_size` + +The number of times to call each sub in a "Sample". + +A sample is a block of timed code. + +For instance: + + ->sample_size(4); + ->add_instance( x => $y ); + ->run_iterations(40); + +This will create a timer block similar to below. + + my $start = time(); + # Unrolled, because benchmarking indicated unrolling was faster. + $y->(); + $y->(); + $y->(); + $y->(); + return time() - $start; + +That block will then be called 10 times ( 40 total code executions batched into 10 groups of 4 ) +and return 10 time values. + +### get:`sample_size` + + my $size = $bench->sample_size; + +Value will default to 1 if not passed during construction. + +### set:`sample_size` + + $bench->sample_size(10); + +Can be performed at any time prior, but not after running tests. + +## `output_fh` + +An output `filehandle` to write very sloppy `CSV` data to. + +Results will be in Columns, sorted by column name alphabetically. + +`output_fh` defaults to `*STDOUT`, or opens a file passed to the constructor as `output` for writing. + +### get:`output_fh` + + my $fh = $bench->output_fh; + +Either \*STDOUT or an opened `filehandle`. + +### set:`output_fh` + + $bench->output_fh( \*STDERR ); + +Can be set at any time prior, but not after, running tests. + +## `run_iterations` + +Executes the attached tests `n` times in batches of [`sample_size`](#sample_size). + + ->run_iterations( 10_000_000 ); + +Because of how it works, simply spooling results at the bottom of the data file, you can call this method +multiple times as necessary and inject more results. + +For instance, this could be used to give a progress report. + + *STDOUT->autoflush(1); + print "[__________]\r["; + for ( 1 .. 10 ) { + $bench->run_iterations( 1_000_000 ); + print "#"; + } + print "]\n"; + +This is also how you can do timed batches: + + my $start = [gettimeofday]; + # Just execute as much as possible until 10 seconds of wallclock pass. + while( tv_interval( $start, [ gettimeofday ]) < 10 ) { + $bench->run_iterations( 1_000 ); + } + +# AUTHOR + +Kent Fredric + +# COPYRIGHT AND LICENSE + +This software is copyright (c) 2014 by Kent Fredric . + +This is free software; you can redistribute it and/or modify it under +the same terms as the Perl 5 programming language system itself. diff --git a/dist.ini b/dist.ini index b90120c..c24b201 100644 --- a/dist.ini +++ b/dist.ini @@ -1,22 +1,200 @@ -; Generated by Dist::Zilla::Plugin::Author::KENTNL::DistINI version 2.020001 at Tue Sep 16 03:45:42 2014 -name = Benchmark-CSV -author = Kent Fredric -license = Perl_5 +; This file is generated from dist.ini.meta by dzil bakeini. +; Edit that file or the bundles contained within for long-term changes. +name = Benchmark-CSV +author = Kent Fredric +license = Perl_5 copyright_holder = Kent Fredric -; Uncomment this to bootstrap via self -; [Bootstrap::lib] - -[@Author::KENTNL] -:version = 2.020001 -normal_form = numify -mantissa = 6 -toolkit = eumm -toolkit_hardness = soft -bumpversions = 1 -copyfiles = LICENSE -srcreadme = mkdn -twitter_hash_tags = #perl #cpan -; auto_prereqs_skip = File::Find +[MetaConfig / @Author::KENTNL/MetaConfig] + +[GithubMeta / @Author::KENTNL/GithubMeta] +issues = 1 + +[MetaProvides::Package / @Author::KENTNL/MetaProvides::Package] +:version = 1.14000001 + +[MetaData::BuiltWith / @Author::KENTNL/MetaData::BuiltWith] +show_config = 1 +show_uname = 1 +uname_args = -s -o -r -m -i + +[Git::Contributors / @Author::KENTNL/Git::Contributors] +:version = 0.006 +include_authors = 0 +include_releaser = 0 +order_by = name + +[Git::GatherDir / @Author::KENTNL/Git::GatherDir] +exclude_filename = README +exclude_filename = README.mkdn +exclude_filename = README.pod +exclude_filename = LICENSE +include_dotfiles = 1 + +[License / @Author::KENTNL/License] + +[MetaJSON / @Author::KENTNL/MetaJSON] + +[MetaYAML / @Author::KENTNL/MetaYAML] + +[Manifest / @Author::KENTNL/Manifest] + +[CopyFilesFromBuild / @Author::KENTNL/CopyXBuild] +copy = LICENSE + +[MetaTests / @Author::KENTNL/MetaTests] + +[PodCoverageTests / @Author::KENTNL/PodCoverageTests] + +[PodSyntaxTests / @Author::KENTNL/PodSyntaxTests] + +[Test::ReportPrereqs / @Author::KENTNL/Test::ReportPrereqs] + +[Test::Kwalitee / @Author::KENTNL/Test::Kwalitee] + +[EOLTests / @Author::KENTNL/EOLTests] +trailing_whitespace = 1 + +[Test::MinimumVersion / @Author::KENTNL/Test::MinimumVersion] + +[Test::Compile::PerFile / @Author::KENTNL/Test::Compile::PerFile] + +[Test::Perl::Critic / @Author::KENTNL/Test::Perl::Critic] + +[ManifestSkip / @Author::KENTNL/ManifestSkip] + +[RewriteVersion::Sanitized / @Author::KENTNL/RewriteVersion::Sanitized] +mantissa = 6 +normal_form = numify + +[PodWeaver / @Author::KENTNL/PodWeaver] +replacer = replace_with_blank + +[AutoPrereqs / @Author::KENTNL/AutoPrereqs] + +[Prereqs / @Author::KENTNL/BundleDevelSuggests] +-phase = develop +-type = suggests +Dist::Zilla::App::Command::bakeini = 0.001000 +Dist::Zilla::PluginBundle::Author::KENTNL = 2.020001 + +[MinimumPerl / @Author::KENTNL/MinimumPerl] + +[Authority / @Author::KENTNL/Authority] +:version = 1.006 +authority = cpan:KENTNL +do_metadata = 1 +locate_comment = 1 + +[MakeMaker / @Author::KENTNL/MakeMaker] +default_jobs = 10 + +[Author::KENTNL::RecommendFixes / @Author::KENTNL/Author::KENTNL::RecommendFixes] +:version = 0.001001 + +[ReadmeFromPod / @Author::KENTNL/ReadmeFromPod] + +[ReadmeAnyFromPod / @Author::KENTNL/ReadmeAnyFromPod] +filename = README.mkdn +location = root +type = markdown + +[Test::CPAN::Changes / @Author::KENTNL/Test::CPAN::Changes] + +[RunExtraTests / @Author::KENTNL/RunExtraTests] +default_jobs = 10 + +[TestRelease / @Author::KENTNL/TestRelease] + +[ConfirmRelease / @Author::KENTNL/ConfirmRelease] + +[Git::Check / @Author::KENTNL/Git::Check] +filename = Changes + +[Git::Commit / @Author::KENTNL/commit_dirty_files] + +[Git::Tag / @Author::KENTNL/tag_master] +tag_format = %v-source + +[Git::NextRelease / @Author::KENTNL/Git::NextRelease] +format = %v %{yyyy-MM-dd'T'HH:mm:ss}dZ +time_zone = UTC + +[BumpVersionAfterRelease / @Author::KENTNL/BumpVersionAfterRelease] + +[Git::Commit / @Author::KENTNL/commit_release_changes] +allow_dirty_match = ^lib/ + +[Git::CommitBuild / @Author::KENTNL/Git::CommitBuild] +release_branch = releases + +[Git::Tag / @Author::KENTNL/tag_release] +branch = releases +tag_format = %v + +[UploadToCPAN / @Author::KENTNL/UploadToCPAN] + +[Twitter / @Author::KENTNL/Twitter] +hash_tags = #perl #cpan +tweet_url = https://metacpan.org/release/{{$AUTHOR_UC}}/{{$DIST}}-{{$VERSION}}{{$TRIAL}}#whatsnew +url_shortener = none + +[Prereqs::Recommend::MatchInstalled / @Author::KENTNL/Prereqs::Recommend::MatchInstalled] +modules = ExtUtils::MakeMaker +modules = Test::More + +[Prereqs::Recommend::MatchInstalled / @Author::KENTNL/always_latest_develop_bundle] +applyto_map = develop.suggests = develop.suggests +applyto_phase = develop +modules = Dist::Zilla::PluginBundle::Author::KENTNL +modules = Dist::Zilla::App::Command::bakeini + +[Prereqs / @Author::KENTNL/::Role::BundleDeps] +-phase = develop +-relationship = requires +Dist::Zilla::Plugin::Author::KENTNL::RecommendFixes = 0.001001 +Dist::Zilla::Plugin::Authority = 1.006 +Dist::Zilla::Plugin::AutoPrereqs = 0 +Dist::Zilla::Plugin::BumpVersionAfterRelease = 0 +Dist::Zilla::Plugin::ConfirmRelease = 0 +Dist::Zilla::Plugin::CopyFilesFromBuild = 0 +Dist::Zilla::Plugin::EOLTests = 0 +Dist::Zilla::Plugin::Git::Check = 0 +Dist::Zilla::Plugin::Git::Commit = 0 +Dist::Zilla::Plugin::Git::CommitBuild = 0 +Dist::Zilla::Plugin::Git::Contributors = 0.006 +Dist::Zilla::Plugin::Git::GatherDir = 0 +Dist::Zilla::Plugin::Git::NextRelease = 0 +Dist::Zilla::Plugin::Git::Tag = 0 +Dist::Zilla::Plugin::GithubMeta = 0 +Dist::Zilla::Plugin::License = 0 +Dist::Zilla::Plugin::MakeMaker = 0 +Dist::Zilla::Plugin::Manifest = 0 +Dist::Zilla::Plugin::ManifestSkip = 0 +Dist::Zilla::Plugin::MetaConfig = 0 +Dist::Zilla::Plugin::MetaData::BuiltWith = 0 +Dist::Zilla::Plugin::MetaJSON = 0 +Dist::Zilla::Plugin::MetaProvides::Package = 1.14000001 +Dist::Zilla::Plugin::MetaTests = 0 +Dist::Zilla::Plugin::MetaYAML = 0 +Dist::Zilla::Plugin::MinimumPerl = 0 +Dist::Zilla::Plugin::PodCoverageTests = 0 +Dist::Zilla::Plugin::PodSyntaxTests = 0 +Dist::Zilla::Plugin::PodWeaver = 0 +Dist::Zilla::Plugin::Prereqs = 0 +Dist::Zilla::Plugin::Prereqs::Recommend::MatchInstalled = 0 +Dist::Zilla::Plugin::ReadmeAnyFromPod = 0 +Dist::Zilla::Plugin::ReadmeFromPod = 0 +Dist::Zilla::Plugin::RewriteVersion::Sanitized = 0 +Dist::Zilla::Plugin::RunExtraTests = 0 +Dist::Zilla::Plugin::Test::CPAN::Changes = 0 +Dist::Zilla::Plugin::Test::Compile::PerFile = 0 +Dist::Zilla::Plugin::Test::Kwalitee = 0 +Dist::Zilla::Plugin::Test::MinimumVersion = 0 +Dist::Zilla::Plugin::Test::Perl::Critic = 0 +Dist::Zilla::Plugin::Test::ReportPrereqs = 0 +Dist::Zilla::Plugin::TestRelease = 0 +Dist::Zilla::Plugin::Twitter = 0 +Dist::Zilla::Plugin::UploadToCPAN = 0 [Prereqs] diff --git a/dist.ini.meta b/dist.ini.meta new file mode 100644 index 0000000..b90120c --- /dev/null +++ b/dist.ini.meta @@ -0,0 +1,22 @@ +; Generated by Dist::Zilla::Plugin::Author::KENTNL::DistINI version 2.020001 at Tue Sep 16 03:45:42 2014 +name = Benchmark-CSV +author = Kent Fredric +license = Perl_5 +copyright_holder = Kent Fredric + +; Uncomment this to bootstrap via self +; [Bootstrap::lib] + +[@Author::KENTNL] +:version = 2.020001 +normal_form = numify +mantissa = 6 +toolkit = eumm +toolkit_hardness = soft +bumpversions = 1 +copyfiles = LICENSE +srcreadme = mkdn +twitter_hash_tags = #perl #cpan +; auto_prereqs_skip = File::Find + +[Prereqs] diff --git a/examples/math/aggregate.pl b/examples/math/aggregate.pl new file mode 100644 index 0000000..d2d0996 --- /dev/null +++ b/examples/math/aggregate.pl @@ -0,0 +1,59 @@ +#!/usr/bin/env perl +# FILENAME: aggregate.pl +# CREATED: 09/16/14 23:34:03 by Kent Fredric (kentnl) +# ABSTRACT: Aggregate csv entries. + +use strict; +use warnings; +use utf8; + +my $bucket_size = 0.00001; + +use Path::Tiny; +use FindBin; +use POSIX qw( floor ); +my $source = path($FindBin::Bin)->child('out.csv')->openr; +my $header = scalar <$source>; +chomp $header; +my (@headings) = split q/,/, $header; +my $buckets = []; + +while ( my $line = <$source> ) { + chomp $line; + my (@fields) = split q/,/, $line; + for ( 0 .. $#fields ) { + my $wrapped = $bucket_size * floor( $fields[$_] / $bucket_size ); + $buckets->[$_] ||= {}; + $buckets->[$_]->{$wrapped} ||= 0; + $buckets->[$_]->{$wrapped}++; + } +} + +my $sbuckets = []; +for my $sbucket ( @{$buckets} ) { + my $obucket = []; + for my $key ( sort { $a <=> $b } keys %{$sbucket} ) { + push @{$obucket}, [ $key, sprintf "%f", $sbucket->{$key} ]; + } + push @{$sbuckets}, $obucket; +} + +my $target = path($FindBin::Bin)->child('out_hist.csv')->openw; + +printf {$target} "%s\n", join q[,], map { ( q[], $_ ) } @headings; + +sub has_sbucket { + for my $bucket ( @{$sbuckets} ) { + return 1 if @{$bucket}; + } +} + +while ( has_sbucket() ) { + my @row; + for my $bucket ( @{$sbuckets} ) { + my $item = shift @{$bucket}; + push @row, @{ $item || [ '', '' ] }; + } + printf {$target} "%s\n", join q[,], @row; +} + diff --git a/examples/math/aggregate_histogram.gnuplot b/examples/math/aggregate_histogram.gnuplot new file mode 100644 index 0000000..fd8079d --- /dev/null +++ b/examples/math/aggregate_histogram.gnuplot @@ -0,0 +1,26 @@ + +#set xrange [0.0006:0.001] + +set datafile separator ',' + +set style fill transparent solid 0.05 + +set key autotitle columnheader + +set grid ytics lc rgb "#777777" lt 0 back + +set boxwidth 0.9 relative + +set terminal pngcairo enhanced rounded size 1300,768 font "Droid Sans" +set output 'math_histogram.png' + +set xtics rotate by 45 right +#set terminal canvas standalone mousing size 800,600 rounded enhanced +#set output '/tmp/mathplot.html' +#set terminal dumb size 80,25 + +set style histogram gap 1 +set style data histograms +set samples 10 +plot for [c=2:14:2] \ + 'out_hist.csv' using c:xticlabels(1) diff --git a/examples/math/math.pl b/examples/math/math.pl new file mode 100644 index 0000000..332e21b --- /dev/null +++ b/examples/math/math.pl @@ -0,0 +1,88 @@ + +use strict; +use warnings; + +use Benchmark::CSV; +use Path::Tiny; +use FindBin; + +chdir $FindBin::Bin; + +my $outfile = path($FindBin::Bin)->child("out.csv"); +my $imagefile = path($FindBin::Bin)->child("math.png"); +my $histfile = path($FindBin::Bin)->child("math_histogram.png"); + +my $bench = Benchmark::CSV->new( + sample_size => 10_000, + output => $outfile, +); + +my $rint = '(int(rand(32768)) + 1)'; +my $rint_small = '(int(rand(32768)) + 1)'; +my $nrint = '(0 - int(rand(32768)) + 1)'; +my $nrint_small = '(0 - int(rand(32768)) + 1)'; + +if ( $ENV{DOUBLE_RANDOM} ) { + $bench->add_instance( 'small: x + y' => eval qq[ sub { $rint_small + $rint_small } ] ); + $bench->add_instance( 'large: x + y' => eval qq[ sub { $rint + $rint } ] ); + $bench->add_instance( 'small: x - y' => eval qq[ sub { $rint_small - $rint_small } ] ); + $bench->add_instance( 'large: x - y' => eval qq[ sub { $rint - $rint } ] ); + + if ( $ENV{ALL} ) { + $bench->add_instance( 'x * y' => eval qq[ sub { $rint * $rint } ] ); + $bench->add_instance( 'x + -y' => eval qq[ sub { $rint + $nrint } ] ); + $bench->add_instance( 'x - -y' => eval qq[ sub { $rint - $nrint } ] ); + $bench->add_instance( 'x ** y' => eval qq[ sub { $rint ** $rint } ] ); + $bench->add_instance( 'x / y' => eval qq[ sub { $rint / $rint } ] ); + } + +} +elsif ( $ENV{SINGLE_RANDOM} ) { + $bench->add_instance( 'small: x + 1' => eval qq[ sub { $rint_small + 1 } ] ); + $bench->add_instance( 'large: x + 1' => eval qq[ sub { $rint + 1 } ] ); + $bench->add_instance( 'small: x - 1' => eval qq[ sub { $rint_small - 1 } ] ); + $bench->add_instance( 'large: x - 1' => eval qq[ sub { $rint - 1 } ] ); + + if ( $ENV{ALL} ) { + $bench->add_instance( 'x * 1' => eval qq[ sub { $rint * 1 } ] ); + $bench->add_instance( 'x + -1' => eval qq[ sub { $rint + -1 } ] ); + $bench->add_instance( 'x - -1' => eval qq[ sub { $rint - -1 } ] ); + $bench->add_instance( 'x ** 1' => eval qq[ sub { $rint ** 1 } ] ); + $bench->add_instance( 'x / 1' => eval qq[ sub { $rint / 1 } ] ); + } +} +else { + my $large = 97531; + my $small = 254; + $bench->add_instance( 'small: x + 1' => sub { $small + 1 } ); + $bench->add_instance( 'large: x + 1' => sub { $large + 1 } ); + $bench->add_instance( 'small: x - 1' => sub { $small - 1 } ); + $bench->add_instance( 'large: x - 1' => sub { $large - 1 } ); + if ( $ENV{ALL} ) { + $bench->add_instance( 'x * 1' => sub { $large * 1 } ); + $bench->add_instance( 'x + -1' => sub { $large + -1 } ); + $bench->add_instance( 'x - -1' => sub { $large - -1 } ); + $bench->add_instance( 'x ** 1' => sub { $large**1 } ); + $bench->add_instance( 'x / 1' => sub { $large / 1 } ); + } + +} + +*STDERR->print("Running benchmark\n"); +*STDERR->autoflush(1); +my $steps = 50; +*STDERR->print( q{[} . ( q[ ] x $steps ) . qq{]\r[} ); +for ( 1 .. $steps ) { + $bench->run_iterations( 10_000_000 / $steps ); + *STDERR->print("#"); +} +*STDERR->print("]\n"); +*STDERR->print("Generating plot\n"); +system( "gnuplot", "plot.gnuplot" ); +*STDERR->print("$imagefile\n"); +*STDERR->print("Collating histogram data\n"); +system( $^X, './aggregate.pl' ); +*STDERR->print("Generating histogram plot\n"); +system( 'gnuplot', 'aggregate_histogram.gnuplot' ); +*STDERR->print("$histfile\n"); + diff --git a/examples/math/plot.gnuplot b/examples/math/plot.gnuplot new file mode 100644 index 0000000..b3142ce --- /dev/null +++ b/examples/math/plot.gnuplot @@ -0,0 +1,49 @@ +binwidth=0.00001 + +bin(x,width)=width*floor(x/width) +clip(x)=binwidth*floor(x/binwidth) + +#set xrange [0.0006:0.001] + +set datafile separator ',' + +set style fill transparent solid 0.05 + +set key autotitle columnheader + +set grid ytics lc rgb "#777777" lt 0 back + +set boxwidth 0.9 relative + +set terminal pngcairo enhanced rounded size 1300,768 font "Droid Sans" + +#set terminal canvas standalone mousing size 800,600 rounded enhanced +#set output '/tmp/mathplot.html' +#set terminal dumb size 80,25 + +set output 'math.png' +plot for [c=1:10] \ + 'out.csv' using (clip(column(c))):(1.0) smooth freq with steps title columnhead(c); + +set output 'history.png' +#set logscale y 1.1 +#set samples 1000 +plot for [c=1:10] \ + 'out.csv' using c:c smooth cumulative with lines title columnhead(c); + +#set samples 100 +#plot for [c=1:10] \ +# '/tmp/out.csv' using c:(0.00005) smooth kdensity with steps title columnhead(c); + +#plot for [c=1:10] \ +# '/tmp/out.csv' using c:(1.0) smooth cnormal with steps title columnhead(c); + +#plot for [c=1:10] \ +# '/tmp/out.csv' using (bin(column(c),binwidth)):(1.0) smooth cnormal with steps title columnhead(c); + +#plot for [c=1:10] \ +# '/tmp/out.csv' using (bin(column(c),binwidth)):(1.0) smooth cumulative with steps title columnhead(c); + +#set style boxplot fraction 0.75 candlesticks nooutliers +#plot for [c=1:10] \ +# '/tmp/out.csv' using (c):c with boxplot title columnhead(c); diff --git a/examples/mkbatch/mkbatch.pl b/examples/mkbatch/mkbatch.pl new file mode 100644 index 0000000..78913bc --- /dev/null +++ b/examples/mkbatch/mkbatch.pl @@ -0,0 +1,59 @@ + +use strict; +use warnings; + +use Benchmark::CSV; +use Path::Tiny; +use FindBin; + +chdir $FindBin::Bin; + +my $outfile = path($FindBin::Bin)->child("out.csv"); +my $imagefile = path($FindBin::Bin)->child("mkbatch.png"); + +my $bench = Benchmark::CSV->new( + sample_size => 100, + output => $outfile, +); + +my $noop = sub { }; + +my $iter_a = join qq[\n], map { '$noop->();' } 0 .. 100; +my $code_a; +my $code_b; + +local $@; +eval <<"EOF" or die $@; +\$code_a = sub { + 1; + $iter_a + 1; +}; +1; +EOF + +eval <<"EOF" or die $@; +\$code_b = sub { + 1; + for ( 0 .. 100 ) { + \$noop->(); + } + 1; +}; +1 +EOF + +$bench->add_instance( 'unrolled' => $code_a ); +$bench->add_instance( 'loop' => $code_b ); +*STDERR->print("Running benchmark\n"); +*STDERR->autoflush(1); +my $steps = 50; +*STDERR->print( q{[} . ( q[ ] x $steps ) . qq{]\r[} ); +for ( 1 .. $steps ) { + $bench->run_iterations( 1_000_000 / $steps ); + *STDERR->print("#"); +} +*STDERR->print("]\n"); +*STDERR->print("Generating plot\n"); +system( "gnuplot", "plot.gnuplot" ); +*STDERR->print("$imagefile\n"); diff --git a/examples/mkbatch/plot.gnuplot b/examples/mkbatch/plot.gnuplot new file mode 100644 index 0000000..040ebaf --- /dev/null +++ b/examples/mkbatch/plot.gnuplot @@ -0,0 +1,42 @@ +binwidth=0.000002 + +bin(x,width)=width*floor(x/width) + +#set xrange [0.0006:0.001] + +set datafile separator ',' + +set style fill transparent solid 0.05 + +set key autotitle columnheader + +set grid ytics lc rgb "#777777" lt 0 back + +set boxwidth 0.9 relative + +set terminal pngcairo enhanced rounded size 1300,768 font "Droid Sans" +set output 'mkbatch.png' + +#set terminal canvas standalone mousing size 800,600 rounded enhanced +#set output '/tmp/mathplot.html' +#set terminal dumb size 80,25 + +plot for [c=1:7] \ + 'out.csv' using (bin(column(c),binwidth)):(1.0) smooth freq with steps title columnhead(c); + +#set samples 100 +#plot for [c=1:7] \ +# '/tmp/out.csv' using c:(0.00005) smooth kdensity with steps title columnhead(c); + +#plot for [c=1:7] \ +# '/tmp/out.csv' using c:(1.0) smooth cnormal with steps title columnhead(c); + +#plot for [c=1:7] \ +# '/tmp/out.csv' using (bin(column(c),binwidth)):(1.0) smooth cnormal with steps title columnhead(c); + +#plot for [c=1:7] \ +# '/tmp/out.csv' using (bin(column(c),binwidth)):(1.0) smooth cumulative with steps title columnhead(c); + +#set style boxplot fraction 0.75 candlesticks nooutliers +#plot for [c=1:7] \ +# '/tmp/out.csv' using (c):c with boxplot title columnhead(c); diff --git a/examples/shuffle/plot.gnuplot b/examples/shuffle/plot.gnuplot new file mode 100644 index 0000000..b787a29 --- /dev/null +++ b/examples/shuffle/plot.gnuplot @@ -0,0 +1,42 @@ +binwidth=0.000002 + +bin(x,width)=width*floor(x/width) + +#set xrange [0.0006:0.001] + +set datafile separator ',' + +set style fill transparent solid 0.05 + +set key autotitle columnheader + +set grid ytics lc rgb "#777777" lt 0 back + +set boxwidth 0.9 relative + +set terminal pngcairo enhanced rounded size 1300,768 font "Droid Sans" +set output 'shuffle.png' + +#set terminal canvas standalone mousing size 800,600 rounded enhanced +#set output '/tmp/mathplot.html' +#set terminal dumb size 80,25 + +plot for [c=1:7] \ + 'out.csv' using (bin(column(c),binwidth)):(1.0) smooth freq with steps title columnhead(c); + +#set samples 100 +#plot for [c=1:7] \ +# '/tmp/out.csv' using c:(0.00005) smooth kdensity with steps title columnhead(c); + +#plot for [c=1:7] \ +# '/tmp/out.csv' using c:(1.0) smooth cnormal with steps title columnhead(c); + +#plot for [c=1:7] \ +# '/tmp/out.csv' using (bin(column(c),binwidth)):(1.0) smooth cnormal with steps title columnhead(c); + +#plot for [c=1:7] \ +# '/tmp/out.csv' using (bin(column(c),binwidth)):(1.0) smooth cumulative with steps title columnhead(c); + +#set style boxplot fraction 0.75 candlesticks nooutliers +#plot for [c=1:7] \ +# '/tmp/out.csv' using (c):c with boxplot title columnhead(c); diff --git a/examples/shuffle/shuffle.pl b/examples/shuffle/shuffle.pl new file mode 100644 index 0000000..d41e72f --- /dev/null +++ b/examples/shuffle/shuffle.pl @@ -0,0 +1,55 @@ + +use strict; +use warnings; + +use Benchmark::CSV; +use Path::Tiny; +use List::Util qw( shuffle ); +use FindBin; + +chdir $FindBin::Bin; + +my $outfile = path($FindBin::Bin)->child("out.csv"); +my $imagefile = path($FindBin::Bin)->child("shuffle.png"); + +my $bench = Benchmark::CSV->new( + sample_size => 200, + output => $outfile, +); + +my @source_keys = map { $_ . ' of Spades', $_ . ' of Clubs', $_ . ' of Diamonds', $_ . ' of Hearts' } + qw( Ace 2 3 4 5 6 7 8 9 10 Jack Queen King ); +my %source_hash = map { $_ => 1 } @source_keys; +my %source_hash_clean = map { $_ => 1 } @source_keys; + +$bench->add_instance( + 'shuffle' => sub { + my @out = shuffle(@source_keys); + 1; + } +); +$bench->add_instance( + 'hash trick' => sub { + { local $source_hash{_peturb} = 1; }; + my @out = keys %source_hash; + 1; + } +); +$bench->add_instance( + 'shuffle keys' => sub { + my @out = shuffle keys %source_hash_clean; + 1; + } +); +*STDERR->print("Running benchmark\n"); +*STDERR->autoflush(1); +my $steps = 50; +*STDERR->print( q{[} . ( q[ ] x $steps ) . qq{]\r[} ); +for ( 1 .. $steps ) { + $bench->run_iterations( 1_000_000 / $steps ); + *STDERR->print("#"); +} +*STDERR->print("]\n"); +*STDERR->print("Generating plot\n"); +system( "gnuplot", "plot.gnuplot" ); +*STDERR->print("$imagefile\n"); diff --git a/lib/Benchmark/CSV.pm b/lib/Benchmark/CSV.pm index c1c4942..488a726 100644 --- a/lib/Benchmark/CSV.pm +++ b/lib/Benchmark/CSV.pm @@ -7,12 +7,351 @@ package Benchmark::CSV; our $VERSION = '0.001000'; -# ABSTRACT: +use Path::Tiny; +use Carp qw( croak ); +use Time::HiRes qw( gettimeofday tv_interval clock_gettime ); +use IO::Handle; +use List::Util qw( shuffle ); + +# ABSTRACT: Report raw timing results in CSV-style format for advanced processing. # AUTHORITY -use Moo; +sub new { + my ( $self, @rest ) = @_; + return bless { ref $rest[0] ? %{ $rest[0] } : @rest }, $self; +} + +sub output_fh { + my $nargs = ( my ( $self, $value ) = @_ ); + if ( $nargs >= 2 ) { + croak 'Cant set output_fh after finalization' if $self->{finalized}; + return ( $self->{output_fh} = $value ); + } + return $self->{output_fh} if $self->{output_fh}; + if ( not $self->{output} ) { + return ( $self->{output_fh} = \*STDOUT ); + } + return ( $self->{output_fh} = Path::Tiny::path( $self->{output} )->openw ); +} + +sub sample_size { + my $nargs = ( my ( $self, $value ) = @_ ); + if ( $nargs >= 2 ) { + croak 'Cant set sample_size after finalization' if $self->{finalized}; + return ( $self->{sample_size} = $value ); + } + return $self->{sample_size} if defined $self->{sample_size}; + return ( $self->{sample_size} = 1 ); +} + +sub add_instance { + my $nargs = ( my ( $self, $name, $method ) = @_ ); + croak 'Too few arguments to ->add_instance( name => sub { })' if $nargs < 3; + croak 'Cant add instances after execution/finalization' if $self->{finalized}; + $self->{instances} ||= {}; + croak "Cant add instance $name more than once" if exists $self->{instances}->{$name}; + $self->{instances}->{$name} = $method; + return; +} + +my $timing_methods = { + ## no critic (ValuesAndExpressions::RequireInterpolationOfMetachars); + 'hires_wall' => { + start => q[my $start = [ gettimeofday ]], + stop => q[tv_interval( $start, [ gettimeofday ])], + }, + + # This one is hard to use as a default due to linux things. + 'hires_cputime_process' => { + + # bits/time.h + # CLOCK_PROCESS_CPUTIME_ID = 2 + start => q[my $start = clock_gettime(2)], + stop => q[clock_gettime(2) - $start], + }, + + # These are all bad because they're very imprecise :( + 'times' => { + start => q[my @start = times], + return => <<'EOF', + my @stop = times; + return ( \$name, sprintf '%f', ( $stop[0]+$stop[1] ) - ($start[0]+$start[1]) ) +EOF + }, + 'times_user' => { + start => q[my @start = times], + return => <<'EOF', + my @stop = times; + return ( \$name, sprintf '%f' , ( $stop[0] - $start[0])) +EOF + }, + 'times_system' => { + start => q[my @start = times], + return => <<'EOF', + my @stop = times; + return ( \$name, sprintf '%f' , ( $stop[1] - $start[1])); +EOF + }, +}; + +sub _compile_timer { + ## no critic (Variables::ProhibitUnusedVarsStricter) + my ( $self, $name, $code, $sample_size ) = @_; + ## no critic (ValuesAndExpressions::RequireInterpolationOfMetachars); + my $run_one = q[ $code->(); ]; + my $run_batch = join qq[\n], map { $run_one } 1 .. $sample_size; + $self->{timing_method} ||= 'hires_wall'; + my ( $starter, $stopper, $return ) = map { $timing_methods->{ $self->{timing_method} }->{$_} } qw( start stop return ); + $return ||= qq[ return ( \$name, sprintf '%f', $stopper ) ]; + + my $sub; + my $build_sub = <<"EOF"; + \$sub = sub { + $starter; + $run_batch; + $return; + }; + 1 +EOF + local $@ = undef; + ## no critic (BuiltinFunctions::ProhibitStringyEval, Lax::ProhibitStringyEval::ExceptForRequire) + croak $@ unless eval $build_sub; + return $sub; +} + +sub _write_header { + my ($self) = @_; + return if $self->{headers_written}; + $self->output_fh->printf( "%s\n", join q[,], sort keys %{ $self->{instances} } ); + $self->{headers_written} = 1; + $self->{finalized} = 1; + return; +} -no Moo; +sub _write_result { + my ( $self, $result ) = @_; + $self->output_fh->printf( "%s\n", join q[,], map { $result->{$_} } sort keys %{$result} ); + return; +} + +sub run_iterations { + my $nargs = ( my ( $self, $count ) = @_ ); + croak 'Arguments missing to ->run_iterations( num )' if $nargs < 2; + $self->_write_header; + my $sample_size = $self->sample_size; + my $timers = {}; + for my $instance ( keys %{ $self->{instances} } ) { + $timers->{$instance} = $self->_compile_timer( $instance, $self->{instances}->{$instance}, $sample_size ); + } + my @timer_names = keys %{$timers}; + for ( 1 .. ( $count / $sample_size ) ) { + $self->_write_result( +{ map { $timers->{$_}->() } shuffle @timer_names } ); + } + $self->output_fh->flush; + return; +} 1; + +=head1 SYNOPSIS + + use Benchmark::CSV; + + my $benchmark = Benchmark::CSV->new( + output => './test.csv', + sample_size => 10, + ); + + $benchmark->add_instance( 'method_a' => sub {}); + $benchmark->add_instance( 'method_b' => sub {}); + + $benchmark->run_iterations(100_000); + +=method C + +Add a test block. + + ->add_instance( name => sub { } ); + +B You can only add test instances prior to executing the tests. + +After executing tests, the number of columns and the column headings become C. + +This is because of how the CSV file is written in parallel with the test batches. + +CSV is written headers first, top to bottom, one column at a time. + +So adding a new column is impossible after the headers have been written without starting over. + +=method C + +Create a benchmark object. + + my $instance = Benchmark::CSV->new( \%hash ); + my $instance = Benchmark::CSV->new( %hash ); + + %hash = { + sample_size => # number of times to call each sub in a sample + output => # A file path to write to + output_fh => # An output filehandle to write to + }; + +=method C + +The number of times to call each sub in a "Sample". + +A sample is a block of timed code. + +For instance: + + ->sample_size(4); + ->add_instance( x => $y ); + ->run_iterations(40); + +This will create a timer block similar to below. + + my $start = time(); + # Unrolled, because benchmarking indicated unrolling was faster. + $y->(); + $y->(); + $y->(); + $y->(); + return time() - $start; + +That block will then be called 10 times ( 40 total code executions batched into 10 groups of 4 ) +and return 10 time values. + +=head3 get:C + + my $size = $bench->sample_size; + +Value will default to 1 if not passed during construction. + +=head3 set:C + + $bench->sample_size(10); + +Can be performed at any time prior, but not after running tests. + +=method C + +An output C to write very sloppy C data to. + +Results will be in Columns, sorted by column name alphabetically. + +C defaults to C<*STDOUT>, or opens a file passed to the constructor as C for writing. + +=head3 get:C + + my $fh = $bench->output_fh; + +Either *STDOUT or an opened C. + +=head3 set:C + + $bench->output_fh( \*STDERR ); + +Can be set at any time prior, but not after, running tests. + +=method C + +Executes the attached tests C times in batches of L<< C|/sample_size >>. + + ->run_iterations( 10_000_000 ); + +Because of how it works, simply spooling results at the bottom of the data file, you can call this method +multiple times as necessary and inject more results. + +For instance, this could be used to give a progress report. + + *STDOUT->autoflush(1); + print "[__________]\r["; + for ( 1 .. 10 ) { + $bench->run_iterations( 1_000_000 ); + print "#"; + } + print "]\n"; + + +This is also how you can do timed batches: + + my $start = [gettimeofday]; + # Just execute as much as possible until 10 seconds of wallclock pass. + while( tv_interval( $start, [ gettimeofday ]) < 10 ) { + $bench->run_iterations( 1_000 ); + } + +=begin :blag + +=head1 RATIONALE. + +I've long found all the other bench-marking utilities well meaning, but easily confusing. + +My biggest misgiving is that they give you one, or two values which it has decided is "the time" your code took, +whether its an average, a median, or some other algorithm, ( Such as in C ), they all amount to basically giving +you a data point, which you have to take for granted. + +That data point may also change wildly between test runs due to computer load or other factors. + +Essentially, the flaw as I see it, is trying to convey what is essentially a I of results as a single point. + +They also run each test sequentially, as in: + + start testing -> + + start test one -> + + <-- end test one + + record data + + start test one -> + + <-- end test one + + record data + + <-- stop testing. + +And that strikes me as incredibly prone to the batches getting different results due to CPU loading variations, +such that, any benchmark run on this way on anything other than a perfectly idle processor +without so much as an C subsystem stealing CPU time, and with your kernel delivering IO +perfectly the whole time. + +And the final numbers don't really seem to take that into consideration. + +C at least gives you variation data, but its rather hard to compare and visualize the results it gives to gain +meaningful insight. + +So, I looked to modeling the data differently, and happened to accidentally throw some hand-collected benchmark data into a +Google Spreadsheet Histogram plot, and found it hugely enlightening on what was really going on. + +One recurring observation I noticed is code run-time seems to have a very lop-sided distribution + + + | ++ + | |++ + | | | + | | | + | | | + | | +++ + | | | + | ++ ++++++++ + | + +++++++++++++++++++++++ + 0 +------------------------------------- + 0 + + +Which suggests to me, that unlike many things people usually use statistics for, +where you have a bunch of things evenly on both sides of the mode, code has an I minimum run time, +which you might see if your system has all factors in "ideal" conditions, and it has a closely following I but +I run time, which I imagine you see because the system can't deliver every cycle of code +in perfect situations every time, even the kernel is selfish and says "Well, if I let your code have exactly 100% CPU for as +long as you wanted it, I doubt even kernel space would be able to do anything till you were quite done" +So observing the minimum time C the median seem to me, useful for comparing algorithm efficiency. + +Observing the maximums is useful too, however, those values trend towards being less useful, as they're likely to be impacted by +CPU randomness slowing things down. + +=end :blag diff --git a/maint/perlcritic.rc.gen.pl b/maint/perlcritic.rc.gen.pl index 51f5d5c..4bbbe84 100644 --- a/maint/perlcritic.rc.gen.pl +++ b/maint/perlcritic.rc.gen.pl @@ -18,7 +18,7 @@ my $bundle = create_bundle('Example::Author::KENTNL'); $bundle->configure; -my @stopwords = (qw()); +my @stopwords = (qw( CSV )); for my $wordlist (@stopwords) { $bundle->add_or_append_policy_field( 'Documentation::PodSpelling' => ( 'stop_words' => $wordlist ) ); } @@ -26,7 +26,8 @@ #$bundle->add_or_append_policy_field( # 'Subroutines::ProhibitCallsToUndeclaredSubs' => ( 'exempt_subs' => 'String::Formatter::str_rf' ), ); -#$bundle->remove_policy('ErrorHandling::RequireUseOfExceptions'); +$bundle->remove_policy('ErrorHandling::RequireUseOfExceptions'); + #$bundle->remove_policy('CodeLayout::RequireUseUTF8'); #$bundle->remove_policy('ErrorHandling::RequireCarping'); #$bundle->remove_policy('NamingConventions::Capitalization'); diff --git a/misc/perlcritic.deps b/misc/perlcritic.deps index 850f958..7152ec0 100644 --- a/misc/perlcritic.deps +++ b/misc/perlcritic.deps @@ -1,3 +1,4 @@ +Perl::Critic::Policy::Documentation::PodSpelling~0 Perl::Critic::Policy::Lax::RequireExplicitPackage::ExceptForPragmata~0 Perl::Critic::Policy::Variables::ProhibitUnusedVarsStricter~0 Perl::Critic::ProfileCompiler::Bundle::Bangs~0 diff --git a/perlcritic.rc b/perlcritic.rc index d4bcd64..16bc094 100644 --- a/perlcritic.rc +++ b/perlcritic.rc @@ -126,6 +126,7 @@ allow_includes = 1 [Documentation::PodSpelling] spell_command = aspell list --lang en_US +stop_words = CSV [Documentation::ProhibitAdjacentLinks] @@ -169,7 +170,7 @@ spell_command = aspell list --lang en_US [ErrorHandling::RequireCheckingReturnValueOfEval] -[ErrorHandling::RequireUseOfExceptions] +[-ErrorHandling::RequireUseOfExceptions] [InputOutput::ProhibitBacktickOperators] diff --git a/t/basic.t b/t/basic.t new file mode 100644 index 0000000..973f596 --- /dev/null +++ b/t/basic.t @@ -0,0 +1,36 @@ + +use strict; +use warnings; + +use Test::More; + +# ABSTRACT: Test basic performance + +use Benchmark::CSV; +use Path::Tiny; + +my $tdir = Path::Tiny->tempdir; + +my $csv = $tdir->child('out.csv'); + +my $bench = Benchmark::CSV->new( + sample_size => 100, + output => $csv, +); + +my $x = 946744; +my $y = 7; + +$bench->add_instance( 'x + y' => sub { $x + $y } ); +$bench->add_instance( 'x - y' => sub { $x - $y } ); + +$bench->run_iterations(100_000); + +my $lines = [ $csv->lines( { chomp => 1 } ) ]; +is( $lines->[0], 'x + y,x - y', "Header in place" ); +like( $lines->[1], qr/\A\d+[.]\d+,\d+[.]\d+/msx, "Second line matches regex" ); +like( $lines->[-1], qr/\A\d+[.]\d+,\d+[.]\d+/msx, "Last line matches regex" ); +is( scalar @{$lines}, 1001, "Has 1 line per sample + header" ); + +done_testing; + diff --git a/t/basic_hashconstruct.t b/t/basic_hashconstruct.t new file mode 100644 index 0000000..bd89342 --- /dev/null +++ b/t/basic_hashconstruct.t @@ -0,0 +1,31 @@ + +use strict; +use warnings; + +use Test::More; + +# ABSTRACT: Test basic performance + +use Benchmark::CSV; +use Path::Tiny; + +my $tdir = Path::Tiny->tempdir; + +my $csv = $tdir->child('out.csv'); + +local $@; +ok( + eval { + my $bench = Benchmark::CSV->new( + { + sample_size => 100, + output => $csv, + } + ); + 1; + }, + "Construct with a hashref" +) or diag $@; + +done_testing; + diff --git a/t/output_fh_set.t b/t/output_fh_set.t new file mode 100644 index 0000000..3877246 --- /dev/null +++ b/t/output_fh_set.t @@ -0,0 +1,48 @@ + +use strict; +use warnings; + +use Test::More; + +# ABSTRACT: Test basic performance + +use Benchmark::CSV; +use Path::Tiny; + +my $tdir = Path::Tiny->tempdir; + +my $csv = $tdir->child('out.csv'); + +{ + my $bench = Benchmark::CSV->new( { sample_size => 100, } ); + $bench->output_fh( \*STDERR ); + + pass("Set output did not fail"); +} +{ + my $bench = Benchmark::CSV->new( { sample_size => 100, } ); + ok( defined $bench->output_fh(), 'got a defined fh when one wasnt passed and no output set' ); +} +{ + my $string = ""; + open my $fh, '>', \$string; + my $bench = Benchmark::CSV->new( { sample_size => 100, output_fh => $fh } ); + ok( defined $bench->output_fh(), 'got a defined fh' ); + $bench->output_fh->print("Test"); + is( $string, "Test", "String written to" ); +} +{ + my $bench = Benchmark::CSV->new( { sample_size => 100, } ); + ok( defined $bench->output_fh(), 'got a defined fh when one wasnt passed and no output set' ); + $bench->{finalized} = 1; + local $@; + my $err = eval { + $bench->output_fh( \*STDERR ); + 1; + }; + isnt( $err, 1, "Setting output_fh after finalized should fail" ); + +} + +done_testing; + diff --git a/t/sample_size_set.t b/t/sample_size_set.t new file mode 100644 index 0000000..2b2df13 --- /dev/null +++ b/t/sample_size_set.t @@ -0,0 +1,42 @@ + +use strict; +use warnings; + +use Test::More; + +# ABSTRACT: Test basic performance + +use Benchmark::CSV; +use Path::Tiny; + +my $tdir = Path::Tiny->tempdir; + +my $csv = $tdir->child('out.csv'); + +{ + my $bench = Benchmark::CSV->new( { sample_size => 100, } ); + + $bench->sample_size(10); + + pass("Set sample_size did not fail"); +} +{ + my $bench = Benchmark::CSV->new( {} ); + my $ss = $bench->sample_size(); + is( $ss, '1', 'default sample size' ); +} +{ + local $@; + my $bench = Benchmark::CSV->new( { sample_size => 100, } ); + $bench->{finalized} = 1; + + my $er = eval { + $bench->sample_size(10); + 1; + }; + + isnt( $er, 1, "Setting sample size once finalised should bail" ); + +} +done_testing; + diff --git a/weaver.ini b/weaver.ini index e6f2b37..9c4687d 100644 --- a/weaver.ini +++ b/weaver.ini @@ -10,6 +10,8 @@ [Generic / DESCRIPTION] [Generic / OVERVIEW] +[Region / blag] + ;[Generic / METHODS] ;[Collect / METHODS] [Collect / METHODS]