Permalink
Browse files

import Memoize 0.46 from CPAN

git-cpan-module:   Memoize
git-cpan-version:  0.46
git-cpan-authorid: MJD
git-cpan-file:     authors/id/M/MJ/MJD/Memoize-0.46.tar.gz
  • Loading branch information...
1 parent a7fb95f commit 84a67d2c0e665bcaba711891ab5106278fc7a095 @mjdominus committed with schwern Sep 8, 1998
Showing with 402 additions and 99 deletions.
  1. +2 −0 MANIFEST
  2. +1 −1 Makefile.PL
  3. +57 −16 Memoize.pm
  4. +61 −0 Memoize/Storable.pm
  5. +171 −19 README
  6. +35 −63 TODO
  7. +6 −0 t/speed.t
  8. +69 −0 t/tie_storable.t
View
2 MANIFEST
@@ -4,6 +4,7 @@ MANIFEST
Memoize.pm
Makefile.PL
Memoize/SDBM_File.pm
+Memoize/Storable.pm
t/correctness.t
t/array.t
t/speed.t
@@ -13,6 +14,7 @@ t/unmemoize.t
t/tie.t
t/tiefeatures.t
t/tie_gdbm.t
+t/tie_storable.t
demo.pl
demo2.pl
TODO
View
2 Makefile.PL
@@ -1,7 +1,7 @@
use ExtUtils::MakeMaker;
WriteMakefile(
NAME => 'Memoize',
- VERSION => '0.45',
+ VERSION => '0.46',
# 'linkext' => {LINKTYPE => ''},
'dist' => {COMPRESS => 'gzip', SUFFIX => 'gz'},
);
View
73 Memoize.pm
@@ -8,10 +8,10 @@
# same terms as Perl itself. If in doubt,
# write to mjd-perl-memoize@plover.com for a license.
#
-# Version 0.45 beta $Revision: 1.8 $ $Date: 1998/09/05 03:43:53 $
+# Version 0.46 beta $Revision: 1.9 $ $Date: 1998/09/07 19:42:52 $
package Memoize;
-$VERSION = '0.45';
+$VERSION = '0.46';
=head1 NAME
@@ -449,21 +449,48 @@ and shorter every time you call C<main>.
=back
-=head1 MY BUGS
+=head1 PERSISTENT CACHE SUPPORT
-Needs a better test suite, especially for the tied stuff.
-That is why the version number is 0.45 instead of 0.50.
+You can tie the cache tables to any sort of tied hash that you want
+to, as long as it supports C<TIEHASH>, C<FETCH>, C<STORE>, and
+C<EXISTS>. For example,
+
+ memoize 'function', SCALAR_CACHE =>
+ [TIE, GDBM_File, $filename, O_RDWR|O_CREAT, 0666];
+
+works just fine. For some storage methods, you need a little glue.
+
+C<SDBM_File> doesn't supply an C<EXISTS> method, so included in this
+package is a glue module called C<Memoize::SDBM_File> which does
+provide one. Use this instead of plain C<SDBM_File> to store your
+cache table on disk in an C<SDBM_File> database:
+
+ memoize 'function',
+ SCALAR_CACHE =>
+ [TIE, Memoize::SDBM_File, $filename, O_RDWR|O_CREAT, 0666];
+
+C<Storable> isn't a tied hash class at all. You can use it to store a
+hash to disk and retrieve it again, but yu can't modify the hash while
+it's on the disk. So if you want to store your cache table in a
+C<Storable> database, use C<Memoize::Storable>, which puts a hashlike
+front-end onto C<Storable>. The hash table is actually kept in
+memory, and is loaded from your C<Storable> file at the time you
+memoize the function, and stored back at the time you unmemoize the
+function:
+
+ memoize 'function',
+ SCALAR_CACHE => [TIE, Memoize::Storable, $filename];
-=head1 OTHER PEOPLE'S BUGS
+ memoize 'function',
+ SCALAR_CACHE => [TIE, Memoize::Storable, $filename, 'nstore'];
-The tied hash class you use for storing your cache table must support
-the following methods: C<tiehash>, C<fetch>, C<store>, C<exists>. In
-particular, you can't use C<SDBM_File> because it doesn't have
-C<exists>. This package contains a glue module, called
-C<Memoize::SDBM_File>, which provides an C<exists> method so that you
-can use C<SDBM_File> with C<Memoize>. Just replace C<SDBM_File> with
-C<Memoize::SDBM_File> in your call to C<memoize>; everything else is
-the same.
+Include the `nstore' option to have the C<Storable> database written
+in `network order'. (See L<Storable> for moer details about this.)
+
+=head1 MY BUGS
+
+Needs a better test suite, especially for the tied stuff.
+That is why the version number is 0.46 instead of 0.50.
=head1 MAILING LIST
@@ -547,7 +574,7 @@ sub memoize {
$install_name = $uppack . '::' . $install_name
unless $install_name =~ /::/;
no strict;
- local($) = 0; # ``Subroutine $install_name redefined at ...''
+ local($^W) = 0; # ``Subroutine $install_name redefined at ...''
*{$install_name} = $wrapper; # Install memoized version
}
@@ -742,11 +769,25 @@ sub unmemoize {
my $name = $tabent->{NAME};
if (defined $name) {
no strict;
- local($) = 0; # ``Subroutine $install_name redefined at ...''
+ local($^W) = 0; # ``Subroutine $install_name redefined at ...''
*{$name} = $tabent->{UNMEMOIZED}; # Replace with original function
}
undef $memotable{$revmemotable{$cref}};
undef $revmemotable{$cref};
+
+ # This removes the last reference to the (possibly tied) memo tables
+ # my ($old_function, $memotabs) = @{$tabent}{'UNMEMOIZED','MEMOS'};
+ # undef $tabent;
+
+# # Untie the memo tables if they were tied.
+# my $i;
+# for $i (0,1) {
+# if (tied %{$memotabs->[$i]}) {
+# warn "Untying hash #$i\n";
+# untie %{$memotabs->[$i]};
+# }
+# }
+
$tabent->{UNMEMOIZED};
}
View
61 Memoize/Storable.pm
@@ -0,0 +1,61 @@
+
+package Memoize::Storable;
+use Storable ();
+$Verbose = 0;
+
+sub TIEHASH {
+ require Carp if $Verbose;
+ my $package = shift;
+ my $filename = shift;
+ my $truehash = (-e $filename) ? Storable::retrieve($filename) : {};
+ my %options;
+ print STDERR "Memoize::Storable::TIEHASH($filename, @_)\n" if $Verbose;
+ @options{@_} = ();
+ my $self =
+ {FILENAME => $filename,
+ H => $truehash,
+ OPTIONS => \%options
+ };
+ bless $self => $package;
+}
+
+sub STORE {
+ require Carp if $Verbose;
+ my $self = shift;
+ print STDERR "Memoize::Storable::STORE(@_)\n" if $Verbose;
+ $self->{H}{$_[0]} = $_[1];
+}
+
+sub FETCH {
+ require Carp if $Verbose;
+ my $self = shift;
+ print STDERR "Memoize::Storable::FETCH(@_)\n" if $Verbose;
+ $self->{H}{$_[0]};
+}
+
+sub EXISTS {
+ require Carp if $Verbose;
+ my $self = shift;
+ print STDERR "Memoize::Storable::EXISTS(@_)\n" if $Verbose;
+ exists $self->{H}{$_[0]};
+}
+
+sub DESTROY {
+ require Carp if $Verbose;
+ my $self= shift;
+ print STDERR "Memoize::Storable::DESTROY(@_)\n" if $Verbose;
+ if ($self->{OPTIONS}{'nstore'}) {
+ Storable::nstore($self->{H}, $self->{FILENAME});
+ } else {
+ Storable::store($self->{H}, $self->{FILENAME});
+ }
+}
+
+sub FIRSTKEY {
+ 'Fake hash from Memoize::Storable';
+}
+
+sub NEXTKEY {
+ undef;
+}
+1;
View
190 README
@@ -1,13 +1,22 @@
Name: Memoize
What: Transparently speed up functions by caching return values.
-Version: 0.45
+Version: 0.46
Author: Mark-Jason Dominus (mjd-perl-memoize@plover.com)
################################################################
What's new since the previous release:
+Now has an interface to `Storable'. This wasn't formerly possible,
+because the base package can only store caches via modules that
+present a tied hash interface, and `Storable' doesn't. Solution:
+Memoize::Storable is a tied hash interface to `Storable'.
+
+################################################################
+
+What's new since 0.45:
+
Storage of cached function return values in a static file is now
tentatively supported. `memoize' now accepts new options SCALAR_CACHE
and LIST_CACHE to specify the destination and protocol for saving
@@ -44,7 +53,6 @@ do so.
################################################################
-
=head1 NAME
Memoize - Make your functions faster by trading space for time
@@ -160,13 +168,14 @@ There are some optional options you can pass to C<memoize> to change
the way it behaves a little. To supply options, invoke C<memoize>
like this:
- memoize(function, TODISK => filename,
- NORMALIZER => function,
- INSTALL => newname
+ memoize(function, NORMALIZER => function,
+ INSTALL => newname,
+ SCALAR_CACHE => option,
+ LIST_CACHE => option
);
-Each of these three options is optional; you can include some, all, or
-none of them.
+Each of these options is optional; you can include some, all, or none
+of them.
=head2 INSTALL
@@ -180,6 +189,9 @@ installs the memoized version of C<fib> as C<fastfib>; without the
C<INSTALL> option it would have replaced the old C<fib> with the
memoized version.
+To prevent C<memoize> from installing the memoized version anywhere, use
+C<INSTALL => undef>.
+
=head2 NORMALIZER
Suppose your function looks like this:
@@ -233,7 +245,7 @@ You would tell C<Memoize> to use this normalizer this way:
C<memoize> knows that if the normalized version of the arguments is
the same for two argument lists, then it can safely look up the value
that it computed for one argument list and return it as the result of
-calling the function with the other argmuent list, even if the
+calling the function with the other argument list, even if the
argument lists look different.
The default normalizer just concatenates the arguments with C<$;> in
@@ -255,18 +267,135 @@ would in scalar context, you can have the normalizer function select
its behavior based on the results of C<wantarray>. Even if called in
a list context, a normalizer should still return a single string.
-=head2 TODISK
+=head2 C<SCALAR_CACHE>, C<LIST_CACHE>
+
+Normally, C<Memoize> caches your function's return values into an
+ordinary Perl hash variable. However, you might like to have the
+values cached on the disk, so that they persist from one run of your
+program to the next, or you might like to associate some other
+interesting semantics with the cached values.
+
+There's a slight complication under the hood of C<Memoize>: There are
+actually I<two> caches, one for scalar values and one for list values.
+When your function is called in scalar context, its return value is
+cached in one hash, and when your function is called in list context,
+its value is cached in the other hash. You can control the caching
+behavior of both contexts independently with these options.
+
+The argument to C<LIST_CACHE> or C<SCALAR_CACHE> must either be one of
+the following four strings:
+
+ MEMORY
+ TIE
+ FAULT
+ MERGE
+
+or else it must be a reference to a list whose first element is one of
+these four strings, such as C<[TIE, arguments...]>.
+
+=over 4
+
+=item C<MEMORY>
+
+C<MEMORY> means that return values from the function will be cached in
+an ordinary Perl hash variable. The hash variable will not persist
+after the program exits. This is the default.
+
+=item C<TIE>
+
+C<TIE> means that the function's return values will be cached in a
+tied hash. A tied hash can have any semantics at all. It is
+typically tied to an on-disk database, so that cached values are
+stored in the database and retrieved from it again when needed, and
+the disk file typically persists after your pogram has exited.
+
+If C<TIE> is specified as the first element of a list, the remaining
+list elements are taken as arguments to the C<tie> call that sets up
+the tied hash. For example,
+
+ SCALAR_CACHE => [TIE, DB_File, $filename, O_RDWR | O_CREAT, 0666]
+
+says to tie the hash into the C<DB_File> package, and to pass the
+C<$filename>, C<O_RDWR | O_CREAT>, and C<0666> arguments to the C<tie>
+call. This has the effect of storing the cache in a C<DB_File>
+database whose name is in C<$filename>.
+
+Other typical uses of C<TIE>:
+
+ LIST_CACHE => [TIE, GDBM_File, $filename, O_RDWR | O_CREAT, 0666]
+ SCALAR_CACHE => [TIE, MLDBM, DB_File, $filename, O_RDWR|O_CREAT, 0666]
+ LIST_CACHE => [TIE, My_Package, $tablename, $key_field, $val_field]
+
+This last might tie the cache hash to a package that you wrote
+yourself that stores the cache in a SQL-accessible database.
+A useful use of this feature: You can construct a batch program that
+runs in the background and populates the memo table, and then when you
+come to run your real program the memoized function will be
+screamingly fast because all its results have been precomputed.
-C<TODISK> means that the memo table should be saved to disk so that it
-will persist between invocations of your program. If you use this
-option, future runs of your program will get immediate benefit from
-the results computed by earlier runs. A useful use of this feature:
-You can construct a batch program that runs in the background and
-populates the memo table, and then when you come to run your real
-program the memoized function will be screamingly fast because al lits
-results have been precomputed. Or you would be able to do this, if
-TODISK were implemented, which it presently isn't. But it will be.
-Some day.
+=item C<FAULT>
+
+C<FAULT> means that you never expect to call the function in scalar
+(or list) context, and that if C<Memoize> detects such a call, it
+should abort the program. The error message is one of
+
+ `foo' function called in forbidden list context at line ...
+ `foo' function called in forbidden scalar context at line ...
+
+=item C<MERGE>
+
+C<MERGE> normally means the function does not distinguish between list
+and sclar context, and that return values in both contexts should be
+stored together. C<LIST_CACHE =E<gt> MERGE> means that list context
+return values should be stored in the same hash that is used for
+scalar context returns, and C<SCALAR_CACHE =E<gt> MERGE> means the
+same, mutatis mutandis. It is an error to specify C<MERGE> for both,
+but it probably does something useful.
+
+Consider this function:
+
+ sub pi { 3; }
+
+Normally, the following code will result in two calls to C<pi>:
+
+ $x = pi();
+ ($y) = pi();
+ $z = pi();
+
+The first call caches the value C<3> in the scalar cache; the second
+caches the list C<(3)> in the list cache. The third call doesn't call
+the real C<pi> function; it gets the value from the scalar cache.
+
+Obviously, the second call to C<pi> is a waste of time, and storing
+its return value is a waste of space. Specifying C<LIST_CACHE
+=E<gt> MERGE> will make C<memoize> use the same cache for scalar and
+list context return values, so that the second call uses the scalar
+cache that was populated by the first call. C<pi> ends up being
+cvalled only once, and both subsequent calls return C<3> from the
+cache, regardless of the calling context.
+
+Another use for C<MERGE> is when you want both kinds of return values
+stored in the same disk file; this saves you from having to deal with
+two disk files instead of one. You can use a normalizer function to
+keep the two sets of return values separate. For example:
+
+ memoize 'myfunc',
+ NORMALIZER => 'n',
+ SCALAR_CACHE => [TIE, MLDBM, DB_File, $filename, ...],
+ LIST_CACHE => MERGE,
+ ;
+
+ sub n {
+ my $context = wantarray() ? 'L' : 'S';
+ # ... now compute the hash key from the arguments ...
+ $hashkey = "$context:$hashkey";
+ }
+
+This normalizer function will store scalar context return values in
+the disk file under keys that begin with C<S:>, and list context
+return values under keys that begin with C<L:>.
+
+=back
=head1 OTHER FUNCTION
@@ -360,6 +489,26 @@ and shorter every time you call C<main>.
=back
+=head1 MY BUGS
+
+Needs a better test suite, especially for the tied stuff.
+That is why the version number is 0.45 instead of 0.50.
+
+=head1 OTHER PEOPLE'S BUGS
+
+The tied hash class you use for storing your cache table must support
+the following methods: C<tiehash>, C<fetch>, C<store>, C<exists>. In
+particular, you can't use C<SDBM_File> because it doesn't have
+C<exists>. This package contains a glue module, called
+C<Memoize::SDBM_File>, which provides an C<exists> method so that you
+can use C<SDBM_File> with C<Memoize>. Just replace C<SDBM_File> with
+C<Memoize::SDBM_File> in your call to C<memoize>; everything else is
+the same.
+
+=head1 MAILING LIST
+
+To join a very low-traffic mailing list for announcements about
+C<Memoize>, send an empty note to C<mjd-perl-memoize-request@plover.com>.
=head1 AUTHOR
@@ -376,5 +525,8 @@ for news and upgrades.
=end html
+To join a mailing list for announcements about C<Memoize>, send an
+empty message to C<mjd-perl-memoize-request@plover.com>.
+
=cut
View
98 TODO
@@ -1,4 +1,4 @@
-# Version 0.05 alpha $Revision: 1.3 $ $Date: 1998/09/05 03:44:04 $
+# Version 0.05 alpha $Revision: 1.4 $ $Date: 1998/09/07 19:48:43 $
=head1 TO DO
@@ -23,10 +23,7 @@ large effect on the performance of C<main>. But if there was a big
difference, you would know that C<suba> or C<subb> was a good
candidate for optimization if you needed to make C<main> go faster.
-=item *
-
-Maybe a tied-hash interface to the memo-table, which a hook to
- automatically populate an entry if no value is there yet?
+Done.
=item *
@@ -39,13 +36,17 @@ We could even bless the new function reference so that it could have
accessor methods for getting to the original function, the options,
the memo table, etc.
+Naah.
+
=item *
The TODISK feature is not ready yet. It will have to be rather
complicated, providing options for which disk method to use (GDBM?
DB_File? Flat file? Storable? User-supplied?) and which stringizing
method to use (FreezeThaw? Marshal? User-supplied?)
+Done!
+
=item *
Maybe an option for automatic expiration of cache values? (`After one
@@ -76,9 +77,13 @@ reason not to do this, but I can't remember what it was.
Add more array value tests to the test suite.
+Does it need more now?
+
=item *
-Fix that `Subroutine u rededined ... line 484' message.
+Fix that `Subroutine u redefined ... line 484' message.
+
+Fixed, I think.
=item *
@@ -89,54 +94,9 @@ Get rid of any remaining *{$ref}{CODE} or similar magic hashes.
There should be an option to dump out the memoized values or to
otherwise traverse them.
-=item *
-
-There was probably some other stuff that I forgot.
-
-
-=item *
-
-Here's the preliminary interface spec for the C<TODISK> option:
-`memoize' takes options named C<SCALAR_CONTEXT> and C<LIST_CONTEXT>.
-Legal values are
-
- MEMORY
- TIE
- FAULT
- MERGE
-
-or a reference to a list whose first element is one of these. The
-default for both is MEMORY, which means that Perl's builtin hashes are
-used, the way they are now. FAULT means that the function should
-never be called in scalar/list context, and that Memoize should croak
-if it is. TIE means that the hash will be tied; it's usaully written
-as
-
- [TIE, packagename, argument-list]
-
-which specifies the package name and arguments for the tie. Memoize
-will load the package if appropriate. Thus
-
- [TIE, Storable, filename, ...]
- [TIE, DB_File, filename, flags, mode, ...]
- [TIE, MLDBM, DB_File, ... ]
-
-MERGE means that return values in the specified context will be stored
-in the same structure that is used for the other context. For
-example, suppose you have a function which always returns a scalar,
-and doesn't care whether it was called in scalar or list context. You
-don't want to store its list-context reutrn separately from its
-scalar-context return, because they're going to be the same anyway,
-and if you stored them separately, you'd waste a call and a cache
-slot. So you say LIST_CONTEXT => MERGE, and then list context is
-considered the same as scalar context.
-
-You can also use MERGE with a normalizer to get the list-context and
-scalar-context returns stored in the same database without conflicting
-with each other.
+What for?
-If you specify MERGE for both, it's either an error or else you get
-them stored in one in-memory hash, or something.
+Maybe the tied hash interface taskes care of this anyway?
=item *
@@ -146,25 +106,19 @@ Include an example that caches DNS lookups.
Make tie for Storable (Memoize::Storable)
-=item*
+A prototype of Memoize::Storable is finished. Test it and add to the
+test suite.
-Make tie for DBI (Memoize::DBI)
+Done.
=item *
-Tie for SDBM doesn't work. Can't subclass SDBM? Why not?
+Make tie for DBI (Memoize::DBI)
=item *
I think there's a bug. See `###BUG'.
-=item *
-
-Docs / code inconsistent about SCALAR_CONTEXT vs SCALAR_CACHE.
-Make up your mind.
-
-Decision: SCALAR_CACHE.
-
=item *
Storable probably can't be done, because it doesn't allow updating.
@@ -187,11 +141,29 @@ writable in-memory cache? A generic tied hash maybe?
Maybe `save' and `restore' methods?
+It isn't working right because the destructor doesn't get called at
+the right time.
+
+This is fixed. `use strict vars' would have caught it immediately. Duh.
+
=item *
+Don't forget about generic interface to Storable-like packages
+
+=item *
+
+
Maybe add in TODISK after all, with TODISK => 'filename' equivalent to
SCALAR_CACHE => [TIE, Memoize::SDBM_File, $filename, O_RDWR|O_CREAT, 0666],
LIST_CACHE => MERGE
+=item *
+
+Maybe the default for LIST_CACHE should be MERGE anyway.
+
+=item *
+
+There was probably some other stuff that I forgot.
+
=back
View
6 t/speed.t
@@ -3,8 +3,14 @@
use lib '..';
use Memoize;
+if (-e '.fast') {
+ print "1..0\n";
+ exit 0;
+}
+
print STDERR "\nWarning: I'm testing the speedup. This might take up to sixty seconds.\n ";
+
print "1..6\n";
sub fib {
View
69 t/tie_storable.t
@@ -0,0 +1,69 @@
+#!/usr/bin/perl
+# -*- mode: perl; perl-indent-level: 2 -*-
+
+use lib qw(. ..);
+use Memoize 0.45 qw(memoize unmemoize);
+use Memoize::Storable;
+$Memoize::Storable::Verbose = 0;
+
+sub i {
+ $_[0];
+}
+
+sub c119 { 119 }
+sub c7 { 7 }
+sub c43 { 43 }
+sub c23 { 23 }
+sub c5 { 5 }
+
+sub n {
+ $_[0]+1;
+}
+
+eval {require Storable};
+if ($@) {
+ print "1..0\n";
+ exit 0;
+}
+
+print "1..4\n";
+
+$file = '/tmp/storable.db';
+unlink $file;
+tryout('Memoize::Storable', $file, 1); # Test 1..4
+unlink $file;
+
+sub tryout {
+ my ($tiepack, $file, $testno) = @_;
+
+
+ memoize 'c5',
+ SCALAR_CACHE => ['TIE', $tiepack, $file],
+ LIST_CACHE => 'FAULT'
+ ;
+
+ my $t1 = c5();
+ my $t2 = c5();
+ print (($t1 == 5) ? "ok $testno\n" : "not ok $testno\n");
+ $testno++;
+ print (($t2 == 5) ? "ok $testno\n" : "not ok $testno\n");
+ unmemoize 'c5';
+ 1;
+ 1;
+
+ # Now something tricky---we'll memoize c23 with the wrong table that
+ # has the 5 already cached.
+ memoize 'c23',
+ SCALAR_CACHE => ['TIE', $tiepack, $file],
+ LIST_CACHE => 'FAULT'
+ ;
+
+ my $t3 = c23();
+ my $t4 = c23();
+ $testno++;
+ print (($t3 == 5) ? "ok $testno\n" : "not ok $testno\n");
+ $testno++;
+ print (($t4 == 5) ? "ok $testno\n" : "not ok $testno\n");
+ unmemoize 'c23';
+}
+

0 comments on commit 84a67d2

Please sign in to comment.