Permalink
Browse files

RE: Biannual Competition to Improve Hashing Function

       Date: Thu, 8 Feb 2001 10:44:00 -0500
       Message-Id: <A5E22933E3D5D4118FFE00508BF373C706A52F@indyexch28.indy.tce.
       Date: Thu, 8 Feb 2001 15:02:47 -0500
       Message-Id: <A5E22933E3D5D4118FFE00508BF373C706A52B@indyexch28.indy.tce.

p4raw-id: //depot/perl@8750
  • Loading branch information...
Mark Fisher Nick Ing-Simmons
Mark Fisher authored and Nick Ing-Simmons committed Feb 8, 2001
1 parent 682a4d6 commit a6fe520e69b81614f687efd2cdbfdcd1bb40e201
Showing with 78 additions and 52 deletions.
  1. +11 −3 hv.h
  2. +18 −0 pod/perldelta.pod
  3. +48 −48 t/lib/dumper.t
  4. +1 −1 t/pragma/warn/perl
View
14 hv.h
@@ -43,14 +43,22 @@ struct xpvhv {
};
/* hash a key */
+/* FYI: This is the "One-at-a-Time" algorithm by Bob Jenkins */
+/* from requirements by Colin Plumb. */
+/* (http://burtleburtle.net/bob/hash/doobs.html) */
#define PERL_HASH(hash,str,len) \
STMT_START { \
register const char *s_PeRlHaSh = str; \
register I32 i_PeRlHaSh = len; \
register U32 hash_PeRlHaSh = 0; \
- while (i_PeRlHaSh--) \
- hash_PeRlHaSh = hash_PeRlHaSh * 33 + *s_PeRlHaSh++; \
- (hash) = hash_PeRlHaSh + (hash_PeRlHaSh>>5); \
+ while (i_PeRlHaSh--) { \
+ hash_PeRlHaSh += *s_PeRlHaSh++; \
+ hash_PeRlHaSh += (hash_PeRlHaSh << 10); \
+ hash_PeRlHaSh ^= (hash_PeRlHaSh >> 6); \
+ } \
+ hash_PeRlHaSh += (hash_PeRlHaSh << 3); \
+ hash_PeRlHaSh ^= (hash_PeRlHaSh >> 11); \
+ (hash) = (hash_PeRlHaSh += (hash_PeRlHaSh << 15)); \
} STMT_END
/*
View
@@ -101,6 +101,13 @@ The tr///C and tr///U features have been removed and will not return;
the interface was a mistake. Sorry about that. For similar
functionality, see pack('U0', ...) and pack('C0', ...).
+=item *
+
+Although "you shouldn't do that", it was possible to write code that
+depends on Perl's hashed key order (Data::Dumper does this). The new
+algorithm "One-at-a-Time" produces a different hashed key order.
+More details are in L<perldelta/Performance Enhancements>.
+
=back
=head1 Core Enhancements
@@ -324,6 +331,17 @@ as opposed to quicksort's Theta(N**2) worst-case run time behaviour),
and that sort() is now stable (meaning that elements with identical
keys will stay ordered as they were before the sort).
+=item *
+
+Hashes now use Bob Jenkins "One-at-a-Time" hashing key algorithm
+(http://burtleburtle.net/bob/hash/doobs.html).
+This algorithm is reasonably fast while producing a much better spread
+of values. Hash values output from the algorithm on a hash of all
+3-char printable ASCII keys comes much closer to passing the DIEHARD
+random number generation tests. According to perlbench, this change
+has not affected the overall speed of Perl.
+
+
=back
=head1 Installation and Configuration Improvements
View
@@ -87,11 +87,11 @@ $WANT = <<'EOT';
#$a = [
# 1,
# {
-# 'a' => $a,
-# 'b' => $a->[1],
# 'c' => [
# 'c'
-# ]
+# ],
+# 'a' => $a,
+# 'b' => $a->[1]
# },
# $a->[1]{'c'}
# ];
@@ -109,11 +109,11 @@ $WANT = <<'EOT';
#@a = (
# 1,
# {
-# 'a' => [],
-# 'b' => {},
# 'c' => [
# 'c'
-# ]
+# ],
+# 'a' => [],
+# 'b' => {}
# },
# []
# );
@@ -131,19 +131,19 @@ TEST q(Data::Dumper->Dumpxs([$a, $b], [qw(*a b)])) if $XS;
##
$WANT = <<'EOT';
#%b = (
+# 'c' => [
+# 'c'
+# ],
# 'a' => [
# 1,
# {},
-# [
-# 'c'
-# ]
+# []
# ],
-# 'b' => {},
-# 'c' => []
+# 'b' => {}
# );
#$b{'a'}[1] = \%b;
+#$b{'a'}[2] = $b{'c'};
#$b{'b'} = \%b;
-#$b{'c'} = $b{'a'}[2];
#$a = $b{'a'};
EOT
@@ -156,15 +156,15 @@ $WANT = <<'EOT';
#$a = [
# 1,
# {
+# 'c' => [],
# 'a' => [],
-# 'b' => {},
-# 'c' => []
+# 'b' => {}
# },
# []
#];
+#$a->[1]{'c'} = \@c;
#$a->[1]{'a'} = $a;
#$a->[1]{'b'} = $a->[1];
-#$a->[1]{'c'} = \@c;
#$a->[2] = \@c;
#$b = $a->[1];
EOT
@@ -192,12 +192,12 @@ $WANT = <<'EOT';
# 1,
# #1
# {
-# a => $a,
-# b => $a->[1],
# c => [
# #0
# 'c'
-# ]
+# ],
+# a => $a,
+# b => $a->[1]
# },
# #2
# $a->[1]{c}
@@ -217,11 +217,11 @@ $WANT = <<'EOT';
#$VAR1 = [
# 1,
# {
-# 'a' => [],
-# 'b' => {},
# 'c' => [
# 'c'
-# ]
+# ],
+# 'a' => [],
+# 'b' => {}
# },
# []
#];
@@ -239,11 +239,11 @@ $WANT = <<'EOT';
#[
# 1,
# {
-# a => $VAR1,
-# b => $VAR1->[1],
# c => [
# 'c'
-# ]
+# ],
+# a => $VAR1,
+# b => $VAR1->[1]
# },
# $VAR1->[1]{c}
#]
@@ -262,8 +262,8 @@ EOT
##
$WANT = <<'EOT';
#$VAR1 = {
-# "abc\0'\efg" => "mno\0",
-# "reftest" => \\1
+# "reftest" => \\1,
+# "abc\0'\efg" => "mno\0"
#};
EOT
@@ -277,8 +277,8 @@ $foo = { "abc\000\'\efg" => "mno\000",
$WANT = <<"EOT";
#\$VAR1 = {
-# 'abc\0\\'\efg' => 'mno\0',
-# 'reftest' => \\\\1
+# 'reftest' => \\\\1,
+# 'abc\0\\'\efg' => 'mno\0'
#};
EOT
@@ -313,15 +313,15 @@ EOT
# do{my $o},
# #2
# {
+# 'c' => [],
# 'a' => 1,
# 'b' => do{my $o},
-# 'c' => [],
# 'd' => {}
# }
# ];
#*::foo{ARRAY}->[1] = $foo;
-#*::foo{ARRAY}->[2]{'b'} = *::foo{SCALAR};
#*::foo{ARRAY}->[2]{'c'} = *::foo{ARRAY};
+#*::foo{ARRAY}->[2]{'b'} = *::foo{SCALAR};
#*::foo{ARRAY}->[2]{'d'} = *::foo{ARRAY}->[2];
#*::foo = *::foo{ARRAY}->[2];
#@bar = @{*::foo{ARRAY}};
@@ -342,15 +342,15 @@ EOT
# -10,
# do{my $o},
# {
+# 'c' => [],
# 'a' => 1,
# 'b' => do{my $o},
-# 'c' => [],
# 'd' => {}
# }
#];
#*::foo{ARRAY}->[1] = $foo;
-#*::foo{ARRAY}->[2]{'b'} = *::foo{SCALAR};
#*::foo{ARRAY}->[2]{'c'} = *::foo{ARRAY};
+#*::foo{ARRAY}->[2]{'b'} = *::foo{SCALAR};
#*::foo{ARRAY}->[2]{'d'} = *::foo{ARRAY}->[2];
#*::foo = *::foo{ARRAY}->[2];
#$bar = *::foo{ARRAY};
@@ -372,13 +372,13 @@ EOT
#*::foo = \5;
#*::foo = \@bar;
#*::foo = {
+# 'c' => [],
# 'a' => 1,
# 'b' => do{my $o},
-# 'c' => [],
# 'd' => {}
#};
-#*::foo{HASH}->{'b'} = *::foo{SCALAR};
#*::foo{HASH}->{'c'} = \@bar;
+#*::foo{HASH}->{'b'} = *::foo{SCALAR};
#*::foo{HASH}->{'d'} = *::foo{HASH};
#$bar[2] = *::foo{HASH};
#%baz = %{*::foo{HASH}};
@@ -399,13 +399,13 @@ EOT
#*::foo = \5;
#*::foo = $bar;
#*::foo = {
+# 'c' => [],
# 'a' => 1,
# 'b' => do{my $o},
-# 'c' => [],
# 'd' => {}
#};
-#*::foo{HASH}->{'b'} = *::foo{SCALAR};
#*::foo{HASH}->{'c'} = $bar;
+#*::foo{HASH}->{'b'} = *::foo{SCALAR};
#*::foo{HASH}->{'d'} = *::foo{HASH};
#$bar->[2] = *::foo{HASH};
#$baz = *::foo{HASH};
@@ -423,9 +423,9 @@ EOT
# -10,
# $foo,
# {
+# c => \@bar,
# a => 1,
# b => \5,
-# c => \@bar,
# d => $bar[2]
# }
#);
@@ -445,9 +445,9 @@ EOT
# -10,
# $foo,
# {
+# c => $bar,
# a => 1,
# b => \5,
-# c => $bar,
# d => $bar->[2]
# }
#];
@@ -476,8 +476,8 @@ EOT
##
$WANT = <<'EOT';
#%kennels = (
-# First => \'Fido',
-# Second => \'Wags'
+# Second => \'Wags',
+# First => \'Fido'
#);
#@dogs = (
# ${$kennels{First}},
@@ -515,8 +515,8 @@ EOT
##
$WANT = <<'EOT';
#%kennels = (
-# First => \'Fido',
-# Second => \'Wags'
+# Second => \'Wags',
+# First => \'Fido'
#);
#@dogs = (
# ${$kennels{First}},
@@ -539,8 +539,8 @@ EOT
# 'Fido',
# 'Wags',
# {
-# First => \$dogs[0],
-# Second => \$dogs[1]
+# Second => \$dogs[1],
+# First => \$dogs[0]
# }
#);
#%kennels = %{$dogs[2]};
@@ -574,13 +574,13 @@ EOT
# 'Fido',
# 'Wags',
# {
-# First => \'Fido',
-# Second => \'Wags'
+# Second => \'Wags',
+# First => \'Fido'
# }
#);
#%kennels = (
-# First => \'Fido',
-# Second => \'Wags'
+# Second => \'Wags',
+# First => \'Fido'
#);
EOT
View
@@ -46,8 +46,8 @@ $x = 3 ;
use warnings 'once' ;
$z = 3 ;
EXPECT
-Name "main::x" used only once: possible typo at - line 4.
Name "main::z" used only once: possible typo at - line 6.
+Name "main::x" used only once: possible typo at - line 4.
########
-X
# perl.c

0 comments on commit a6fe520

Please sign in to comment.