Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stats: change FMSketch hash function from fnv to murmur #4541

Merged
merged 2 commits into from Sep 15, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 24 additions & 0 deletions _vendor/src/github.com/spaolacci/murmur3/LICENSE
@@ -0,0 +1,24 @@
Copyright 2013, Sébastien Paolacci.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the library nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65 changes: 65 additions & 0 deletions _vendor/src/github.com/spaolacci/murmur3/murmur.go
@@ -0,0 +1,65 @@
// Copyright 2013, Sébastien Paolacci. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

/*
Native (and fast) implementation of Austin Appleby's MurmurHash3.

Package murmur3 implements Austin Appleby's non-cryptographic MurmurHash3.

Reference implementation:
http://code.google.com/p/smhasher/wiki/MurmurHash3

History, characteristics and (legacy) perfs:
https://sites.google.com/site/murmurhash/
https://sites.google.com/site/murmurhash/statistics
*/
package murmur3

type bmixer interface {
bmix(p []byte) (tail []byte)
Size() (n int)
reset()
}

type digest struct {
clen int // Digested input cumulative length.
tail []byte // 0 to Size()-1 bytes view of `buf'.
buf [16]byte // Expected (but not required) to be Size() large.
bmixer
}

func (d *digest) BlockSize() int { return 1 }

func (d *digest) Write(p []byte) (n int, err error) {
n = len(p)
d.clen += n

if len(d.tail) > 0 {
// Stick back pending bytes.
nfree := d.Size() - len(d.tail) // nfree ∈ [1, d.Size()-1].
if nfree < len(p) {
// One full block can be formed.
block := append(d.tail, p[:nfree]...)
p = p[nfree:]
_ = d.bmix(block) // No tail.
} else {
// Tail's buf is large enough to prevent reallocs.
p = append(d.tail, p...)
}
}

d.tail = d.bmix(p)

// Keep own copy of the 0 to Size()-1 pending bytes.
nn := copy(d.buf[:], d.tail)
d.tail = d.buf[:nn]

return n, nil
}

func (d *digest) Reset() {
d.clen = 0
d.tail = nil
d.bmixer.reset()
}
189 changes: 189 additions & 0 deletions _vendor/src/github.com/spaolacci/murmur3/murmur128.go
@@ -0,0 +1,189 @@
package murmur3

import (
//"encoding/binary"
"hash"
"unsafe"
)

const (
c1_128 = 0x87c37b91114253d5
c2_128 = 0x4cf5ad432745937f
)

// Make sure interfaces are correctly implemented.
var (
_ hash.Hash = new(digest128)
_ Hash128 = new(digest128)
_ bmixer = new(digest128)
)

// Hack: the standard api doesn't define any Hash128 interface.
type Hash128 interface {
hash.Hash
Sum128() (uint64, uint64)
}

// digest128 represents a partial evaluation of a 128 bites hash.
type digest128 struct {
digest
h1 uint64 // Unfinalized running hash part 1.
h2 uint64 // Unfinalized running hash part 2.
}

func New128() Hash128 {
d := new(digest128)
d.bmixer = d
d.Reset()
return d
}

func (d *digest128) Size() int { return 16 }

func (d *digest128) reset() { d.h1, d.h2 = 0, 0 }

func (d *digest128) Sum(b []byte) []byte {
h1, h2 := d.Sum128()
return append(b,
byte(h1>>56), byte(h1>>48), byte(h1>>40), byte(h1>>32),
byte(h1>>24), byte(h1>>16), byte(h1>>8), byte(h1),

byte(h2>>56), byte(h2>>48), byte(h2>>40), byte(h2>>32),
byte(h2>>24), byte(h2>>16), byte(h2>>8), byte(h2),
)
}

func (d *digest128) bmix(p []byte) (tail []byte) {
h1, h2 := d.h1, d.h2

nblocks := len(p) / 16
for i := 0; i < nblocks; i++ {
t := (*[2]uint64)(unsafe.Pointer(&p[i*16]))
k1, k2 := t[0], t[1]

k1 *= c1_128
k1 = (k1 << 31) | (k1 >> 33) // rotl64(k1, 31)
k1 *= c2_128
h1 ^= k1

h1 = (h1 << 27) | (h1 >> 37) // rotl64(h1, 27)
h1 += h2
h1 = h1*5 + 0x52dce729

k2 *= c2_128
k2 = (k2 << 33) | (k2 >> 31) // rotl64(k2, 33)
k2 *= c1_128
h2 ^= k2

h2 = (h2 << 31) | (h2 >> 33) // rotl64(h2, 31)
h2 += h1
h2 = h2*5 + 0x38495ab5
}
d.h1, d.h2 = h1, h2
return p[nblocks*d.Size():]
}

func (d *digest128) Sum128() (h1, h2 uint64) {

h1, h2 = d.h1, d.h2

var k1, k2 uint64
switch len(d.tail) & 15 {
case 15:
k2 ^= uint64(d.tail[14]) << 48
fallthrough
case 14:
k2 ^= uint64(d.tail[13]) << 40
fallthrough
case 13:
k2 ^= uint64(d.tail[12]) << 32
fallthrough
case 12:
k2 ^= uint64(d.tail[11]) << 24
fallthrough
case 11:
k2 ^= uint64(d.tail[10]) << 16
fallthrough
case 10:
k2 ^= uint64(d.tail[9]) << 8
fallthrough
case 9:
k2 ^= uint64(d.tail[8]) << 0

k2 *= c2_128
k2 = (k2 << 33) | (k2 >> 31) // rotl64(k2, 33)
k2 *= c1_128
h2 ^= k2

fallthrough

case 8:
k1 ^= uint64(d.tail[7]) << 56
fallthrough
case 7:
k1 ^= uint64(d.tail[6]) << 48
fallthrough
case 6:
k1 ^= uint64(d.tail[5]) << 40
fallthrough
case 5:
k1 ^= uint64(d.tail[4]) << 32
fallthrough
case 4:
k1 ^= uint64(d.tail[3]) << 24
fallthrough
case 3:
k1 ^= uint64(d.tail[2]) << 16
fallthrough
case 2:
k1 ^= uint64(d.tail[1]) << 8
fallthrough
case 1:
k1 ^= uint64(d.tail[0]) << 0
k1 *= c1_128
k1 = (k1 << 31) | (k1 >> 33) // rotl64(k1, 31)
k1 *= c2_128
h1 ^= k1
}

h1 ^= uint64(d.clen)
h2 ^= uint64(d.clen)

h1 += h2
h2 += h1

h1 = fmix64(h1)
h2 = fmix64(h2)

h1 += h2
h2 += h1

return h1, h2
}

func fmix64(k uint64) uint64 {
k ^= k >> 33
k *= 0xff51afd7ed558ccd
k ^= k >> 33
k *= 0xc4ceb9fe1a85ec53
k ^= k >> 33
return k
}

/*
func rotl64(x uint64, r byte) uint64 {
return (x << r) | (x >> (64 - r))
}
*/

// Sum128 returns the MurmurHash3 sum of data. It is equivalent to the
// following sequence (without the extra burden and the extra allocation):
// hasher := New128()
// hasher.Write(data)
// return hasher.Sum128()
func Sum128(data []byte) (h1 uint64, h2 uint64) {
d := &digest128{h1: 0, h2: 0}
d.tail = d.bmix(data)
d.clen = len(data)
return d.Sum128()
}