Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add version of xxHash64 that uses native BigInt #24

Open
wants to merge 1 commit into
base: master
from

Conversation

@Daniel15
Copy link
Contributor

commented Jul 28, 2019

Adds a new file, xxhash64-bigint.js, that uses native JavaScript BigInts rather than the cuint library. I haven't added it to your build scripts or anything like that yet, as I'm not quite sure how it should be packaged. Probably the best thing is to have a separate npm package (eg. xxhashjs-bigint) that doesn't depend on the cunit package at all. It could also be added to the existing package, but that means the cunit package would still be pulled in even when not needed.

I've been testing with scripts similar to this:

debugger;
const New64 = require('./lib/xxhash64-bigint');
const Old64 = require('./lib/xxhash64');

const input = 'hello world AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA';

console.log('NEW: ', New64(input, 0));
console.log('OLD: ', Old64(input, 0).toString());

Which results in output like:

NEW:  769387546530018441n
OLD:  769387546530018441

Since it's a new file, the easiest way to view the changes is probably to manually compare the original xxhash64.js (after the patch in #23) with the new file. A diff is below:

--- C:/src/js-xxhash/lib/xxhash64-orig.js	Sat Jul 27 17:20:28 2019
+++ C:/src/js-xxhash/lib/xxhash64-bigint.js	Sat Jul 27 17:21:21 2019
@@ -2,18 +2,20 @@
 xxHash64 implementation in pure Javascript
 
 Copyright (C) 2016, Pierre Curto
+Copyright (C) 2019, Daniel Lo Nigro <d.sb>
 MIT license
 */
-var UINT64 = require('cuint').UINT64
 
 /*
  * Constants
  */
-var PRIME64_1 = UINT64( '11400714785074694791' )
-var PRIME64_2 = UINT64( '14029467366897019727' )
-var PRIME64_3 = UINT64(  '1609587929392839161' )
-var PRIME64_4 = UINT64(  '9650029242287828579' )
-var PRIME64_5 = UINT64(  '2870177450012600261' )
+var PRIME64_1 = BigInt( '11400714785074694791' )
+var PRIME64_2 = BigInt( '14029467366897019727' )
+var PRIME64_3 = BigInt(  '1609587929392839161' )
+var PRIME64_4 = BigInt(  '9650029242287828579' )
+var PRIME64_5 = BigInt(  '2870177450012600261' )
+var BITS = 64n
+var BITMASK = 2n ** BITS - 1n
 
 /**
 * Convert string to proper UTF-8 array
@@ -53,6 +55,64 @@
 }
 
 /**
+ * Converts bits to a BigInt.
+ * @param {Number} first low bits (8)
+ * @param {Number} second low bits (8)
+ * @param {Number} first high bits (8)
+ * @param {Number} second high bits (8)
+ */
+function bitsToBigInt(a00, a16, a32, a48) {
+	return (
+		BigInt(a00) | 
+		(BigInt(a16) << 16n) |
+		(BigInt(a32) << 32n) |
+		(BigInt(a48) << 48n)
+	)
+}
+
+/**
+ * Converts a chunk of memory (either an ArrayBuffer or a Node.js Buffer)
+ * to a BigInt representing a 64-bit integer.
+ * @param {ArrayBuffer|Buffer} The buffer
+ * @param {number} offset
+ * @return BigInt
+ */
+function memoryToBigInt(memory, offset) {
+	return (
+		BigInt(memory[offset]) | 
+		(BigInt(memory[offset+1]) << 8n) |
+		(BigInt(memory[offset+2]) << 16n) |
+		(BigInt(memory[offset+3]) << 24n) |
+		(BigInt(memory[offset+4]) << 32n) |
+		(BigInt(memory[offset+5]) << 40n) |
+		(BigInt(memory[offset+6]) << 48n) | 
+		(BigInt(memory[offset+7]) << 56n)
+	)
+}
+
+/**
+ * Performs a left bitwise rotation on the given unsigned 64-bit integer.
+ * @param {BigInt} number to rotate
+ * @param {BigInt} number of bits to rotate by
+ * @return BigInt
+ */
+function rotateLeft(value, rotation) {
+	return (
+		((value << rotation) & BITMASK) |
+		(value >> (BITS - rotation))
+	);
+}
+
+/**
+ * Truncate a BigInt to a 64-bit unsigned integer.
+ * @param {BigInt} number to truncate
+ * @return BigInt
+ */
+function truncate(value) {
+	return BigInt.asUintN(64, value);
+}
+
+/**
  * XXH64 object used as a constructor or a function
  * @constructor
  * or
@@ -79,11 +139,11 @@
  * @return ThisExpression
  */
  function init (seed) {
-	this.seed = seed instanceof UINT64 ? seed.clone() : UINT64(seed)
-	this.v1 = this.seed.clone().add(PRIME64_1).add(PRIME64_2)
-	this.v2 = this.seed.clone().add(PRIME64_2)
-	this.v3 = this.seed.clone()
-	this.v4 = this.seed.clone().subtract(PRIME64_1)
+	this.seed = BigInt.asUintN(32, BigInt(seed))
+	this.v1 = truncate(this.seed + PRIME64_1 + PRIME64_2)
+	this.v2 = truncate(this.seed + PRIME64_2)
+	this.v3 = this.seed
+	this.v4 = truncate(this.seed - PRIME64_1)
 	this.total_len = 0
 	this.memsize = 0
 	this.memory = null
@@ -154,37 +214,17 @@
 
 		var p64 = 0
 		var other
-		other = UINT64(
-				(this.memory[p64+1] << 8) | this.memory[p64]
-			,	(this.memory[p64+3] << 8) | this.memory[p64+2]
-			,	(this.memory[p64+5] << 8) | this.memory[p64+4]
-			,	(this.memory[p64+7] << 8) | this.memory[p64+6]
-			)
-		this.v1.add( other.multiply(PRIME64_2) ).rotl(31).multiply(PRIME64_1);
+		other = memoryToBigInt(this.memory, p64)
+		this.v1 = truncate(rotateLeft(truncate(this.v1 + other * PRIME64_2), 31n) * PRIME64_1);
 		p64 += 8
-		other = UINT64(
-				(this.memory[p64+1] << 8) | this.memory[p64]
-			,	(this.memory[p64+3] << 8) | this.memory[p64+2]
-			,	(this.memory[p64+5] << 8) | this.memory[p64+4]
-			,	(this.memory[p64+7] << 8) | this.memory[p64+6]
-			)
-		this.v2.add( other.multiply(PRIME64_2) ).rotl(31).multiply(PRIME64_1);
+		other = memoryToBigInt(this.memory, p64)
+		this.v2 = truncate(rotateLeft(truncate(this.v2 + other * PRIME64_2), 31n) * PRIME64_1);
 		p64 += 8
-		other = UINT64(
-				(this.memory[p64+1] << 8) | this.memory[p64]
-			,	(this.memory[p64+3] << 8) | this.memory[p64+2]
-			,	(this.memory[p64+5] << 8) | this.memory[p64+4]
-			,	(this.memory[p64+7] << 8) | this.memory[p64+6]
-			)
-		this.v3.add( other.multiply(PRIME64_2) ).rotl(31).multiply(PRIME64_1);
+		other = memoryToBigInt(this.memory, p64)
+		this.v3 = truncate(rotateLeft(truncate(this.v3 + other * PRIME64_2), 31n) * PRIME64_1);
 		p64 += 8
-		other = UINT64(
-				(this.memory[p64+1] << 8) | this.memory[p64]
-			,	(this.memory[p64+3] << 8) | this.memory[p64+2]
-			,	(this.memory[p64+5] << 8) | this.memory[p64+4]
-			,	(this.memory[p64+7] << 8) | this.memory[p64+6]
-			)
-		this.v4.add( other.multiply(PRIME64_2) ).rotl(31).multiply(PRIME64_1);
+		other = memoryToBigInt(this.memory, p64)
+		this.v4 = truncate(rotateLeft(truncate(this.v4 + other * PRIME64_2), 31n) * PRIME64_1);
 
 		p += 32 - this.memsize
 		this.memsize = 0
@@ -197,37 +237,17 @@
 		do
 		{
 			var other
-			other = UINT64(
-					(input[p+1] << 8) | input[p]
-				,	(input[p+3] << 8) | input[p+2]
-				,	(input[p+5] << 8) | input[p+4]
-				,	(input[p+7] << 8) | input[p+6]
-				)
-			this.v1.add( other.multiply(PRIME64_2) ).rotl(31).multiply(PRIME64_1);
+			other = memoryToBigInt(input, p)
+			this.v1 = truncate(rotateLeft(truncate(this.v1 + other * PRIME64_2), 31n) * PRIME64_1);
 			p += 8
-			other = UINT64(
-					(input[p+1] << 8) | input[p]
-				,	(input[p+3] << 8) | input[p+2]
-				,	(input[p+5] << 8) | input[p+4]
-				,	(input[p+7] << 8) | input[p+6]
-				)
-			this.v2.add( other.multiply(PRIME64_2) ).rotl(31).multiply(PRIME64_1);
+			other = memoryToBigInt(input, p)
+			this.v2 = truncate(rotateLeft(truncate(this.v2 + other * PRIME64_2), 31n) * PRIME64_1);
 			p += 8
-			other = UINT64(
-					(input[p+1] << 8) | input[p]
-				,	(input[p+3] << 8) | input[p+2]
-				,	(input[p+5] << 8) | input[p+4]
-				,	(input[p+7] << 8) | input[p+6]
-				)
-			this.v3.add( other.multiply(PRIME64_2) ).rotl(31).multiply(PRIME64_1);
+			other = memoryToBigInt(input, p)
+			this.v3 = truncate(rotateLeft(truncate(this.v3 + other * PRIME64_2), 31n) * PRIME64_1);
 			p += 8
-			other = UINT64(
-					(input[p+1] << 8) | input[p]
-				,	(input[p+3] << 8) | input[p+2]
-				,	(input[p+5] << 8) | input[p+4]
-				,	(input[p+7] << 8) | input[p+6]
-				)
-			this.v4.add( other.multiply(PRIME64_2) ).rotl(31).multiply(PRIME64_1);
+			other = memoryToBigInt(input, p)
+			this.v4 = truncate(rotateLeft(truncate(this.v4 + other * PRIME64_2), 31n) * PRIME64_1);
 			p += 8
 		} while (p <= limit)
 	}
@@ -257,83 +277,68 @@
 	var p = 0
 	var bEnd = this.memsize
 	var h64, h
-	var u = new UINT64
+	var u
 
 	if (this.total_len >= 32)
 	{
-		h64 = this.v1.clone().rotl(1)
-		h64.add( this.v2.clone().rotl(7) )
-		h64.add( this.v3.clone().rotl(12) )
-		h64.add( this.v4.clone().rotl(18) )
-
-		h64.xor( this.v1.multiply(PRIME64_2).rotl(31).multiply(PRIME64_1) )
-		h64.multiply(PRIME64_1).add(PRIME64_4)
+		h64 = rotateLeft(this.v1, 1n) + 
+			rotateLeft(this.v2, 7n) + 
+			rotateLeft(this.v3, 12n) + 
+			rotateLeft(this.v4, 18n)
+
+		h64 = truncate(h64 ^ (rotateLeft(truncate(this.v1 * PRIME64_2), 31n) * PRIME64_1))
+		h64 = truncate(h64 * PRIME64_1 + PRIME64_4)
 
-		h64.xor( this.v2.multiply(PRIME64_2).rotl(31).multiply(PRIME64_1) )
-		h64.multiply(PRIME64_1).add(PRIME64_4)
+		h64 = truncate(h64 ^ (rotateLeft(truncate(this.v2 * PRIME64_2), 31n) * PRIME64_1))
+		h64 = truncate(h64 * PRIME64_1 + PRIME64_4)
 
-		h64.xor( this.v3.multiply(PRIME64_2).rotl(31).multiply(PRIME64_1) )
-		h64.multiply(PRIME64_1).add(PRIME64_4)
+		h64 = truncate(h64 ^ (rotateLeft(truncate(this.v3 * PRIME64_2), 31n) * PRIME64_1))
+		h64 = truncate(h64 * PRIME64_1 + PRIME64_4)
 
-		h64.xor( this.v4.multiply(PRIME64_2).rotl(31).multiply(PRIME64_1) )
-		h64.multiply(PRIME64_1).add(PRIME64_4)
+		h64 = truncate(h64 ^ (rotateLeft(truncate(this.v4 * PRIME64_2), 31n) * PRIME64_1))
+		h64 = truncate(h64 * PRIME64_1 + PRIME64_4)
 	}
 	else
 	{
-		h64  = this.seed.clone().add( PRIME64_5 )
+		h64  = truncate(this.seed + PRIME64_5)
 	}
 
-	h64.add( u.fromNumber(this.total_len) )
+	h64 += BigInt(this.total_len)
 
 	while (p <= bEnd - 8)
 	{
-		u.fromBits(
-			(input[p+1] << 8) | input[p]
-		,	(input[p+3] << 8) | input[p+2]
-		,	(input[p+5] << 8) | input[p+4]
-		,	(input[p+7] << 8) | input[p+6]
-			)
-		u.multiply(PRIME64_2).rotl(31).multiply(PRIME64_1)
-		h64
-			.xor(u)
-			.rotl(27)
-			.multiply( PRIME64_1 )
-			.add( PRIME64_4 )
+		u = memoryToBigInt(input, p)
+		u = truncate(rotateLeft(truncate(u * PRIME64_2), 31n) * PRIME64_1)
+		
+		h64 = truncate((rotateLeft(h64 ^ u, 27n) * PRIME64_1) + PRIME64_4)
 		p += 8
 	}
 
 	if (p + 4 <= bEnd) {
-		u.fromBits(
+		u = bitsToBigInt(
 			(input[p+1] << 8) | input[p]
 		,	(input[p+3] << 8) | input[p+2]
 		,	0
 		,	0
 		)
-		h64
-			.xor( u.multiply(PRIME64_1) )
-			.rotl(23)
-			.multiply( PRIME64_2 )
-			.add( PRIME64_3 )
+		h64 = truncate((rotateLeft(h64 ^ truncate((u * PRIME64_1)), 23n) * PRIME64_2) + PRIME64_3)
 		p += 4
 	}
 
 	while (p < bEnd)
 	{
-		u.fromBits( input[p++], 0, 0, 0 )
-		h64
-			.xor( u.multiply(PRIME64_5) )
-			.rotl(11)
-			.multiply(PRIME64_1)
+		u = bitsToBigInt( input[p++], 0, 0, 0 )
+		h64 = truncate(rotateLeft(h64 ^ truncate(u * PRIME64_5), 11n) * PRIME64_1)
 	}
 
-	h = h64.clone().shiftRight(33)
-	h64.xor(h).multiply(PRIME64_2)
+	h = truncate(h64 >> 33n)
+	h64 = truncate((h64 ^ h) * PRIME64_2)
 
-	h = h64.clone().shiftRight(29)
-	h64.xor(h).multiply(PRIME64_3)
+	h = truncate(h64 >> 29n)
+	h64 = truncate((h64 ^ h) * PRIME64_3)
 
-	h = h64.clone().shiftRight(32)
-	h64.xor(h)
+	h = truncate(h64 >> 32n)
+	h64 = truncate(h64 ^ h)
 
 	// Reset the state
 	this.init( this.seed )

Closes #22

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
1 participant
You can’t perform that action at this time.