Skip to content

Commit

Permalink
Improve performance and allow Uint8Array inputs (#15)
Browse files Browse the repository at this point in the history
The input value can be a string or a Uint8Array. If it's a string, it will be encoded to a Uint8Array using the cached TextEncoder instance.

Added a new `utf8Buffer` option to allow the caller to provide a pre-allocated Uint8Array buffer to use for encoding. When the `utf8Buffer` option is provided, the input string is encoded into the buffer using `TextEncoder.encodeInto()`.

The new test cases use the same input strings and expected bigint values as the original test cases, so it's easy to see that the new implementation produces the same results.
  • Loading branch information
dbrockman committed Nov 16, 2023
1 parent ddbe4f2 commit edb1546
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 18 deletions.
21 changes: 20 additions & 1 deletion index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,17 @@ export interface Options {
@default 32
*/
readonly size?: 32 | 64 | 128 | 256 | 512 | 1024;

/**
A Uint8Array used to encode the string into UTF-8 bytes.
This array can be reused across calls to `fnv1a`. Doing so will improve performance because it avoids allocating a new Uint8Array when encoding the string.
The size of the array does not have to be large enugh to hold the entire string, but performance will be improved if it is.
This option is only used when `value` is a string.
*/
readonly utf8Buffer?: Uint8Array;
}

/**
Expand All @@ -24,6 +35,14 @@ fnv1a('🦄🌈', {size: 128});
Number(fnv1a('🦄🌈', {size: 32}));
//=> 2868248295
const utf8Buffer = new Uint8Array(100);
fnv1a('🦄🌈', {size: 32, utf8Buffer});
//=> 2868248295n
const bytes = new Uint8Array([240, 159, 166, 132, 240, 159, 140, 136]);
fnv1a(bytes, {size: 32});
//=> 2868248295n
```
*/
export default function fnv1a(string: string, options?: Options): bigint;
export default function fnv1a(value: string | Uint8Array, options?: Options): bigint;
56 changes: 40 additions & 16 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,30 +19,54 @@ const FNV_OFFSETS = {
1024: 14_197_795_064_947_621_068_722_070_641_403_218_320_880_622_795_441_933_960_878_474_914_617_582_723_252_296_732_303_717_722_150_864_096_521_202_355_549_365_628_174_669_108_571_814_760_471_015_076_148_029_755_969_804_077_320_157_692_458_563_003_215_304_957_150_157_403_644_460_363_550_505_412_711_285_966_361_610_267_868_082_893_823_963_790_439_336_411_086_884_584_107_735_010_676_915n,
};

export default function fnv1a(string, {size = 32} = {}) {
if (!FNV_PRIMES[size]) {
throw new Error('The `size` option must be one of 32, 64, 128, 256, 512, or 1024');
}
const cachedEncoder = new globalThis.TextEncoder();

function fnv1aUint8Array(uint8Array, size) {
const fnvPrime = FNV_PRIMES[size];
let hash = FNV_OFFSETS[size];

// eslint-disable-next-line unicorn/no-for-loop -- This is a performance-sensitive loop
for (let index = 0; index < uint8Array.length; index++) {
hash ^= BigInt(uint8Array[index]);
hash = BigInt.asUintN(size, hash * fnvPrime);
}

return hash;
}

function fnv1aEncodeInto(string, size, utf8Buffer) {
if (utf8Buffer.length === 0) {
throw new Error('The `utf8Buffer` option must have a length greater than zero');
}

const fnvPrime = FNV_PRIMES[size];
let hash = FNV_OFFSETS[size];
let remaining = string;

while (remaining.length > 0) {
const result = cachedEncoder.encodeInto(remaining, utf8Buffer);
remaining = remaining.slice(result.read);
for (let index = 0; index < result.written; index++) {
hash ^= BigInt(utf8Buffer[index]);
hash = BigInt.asUintN(size, hash * fnvPrime);
}
}

// Handle Unicode code points > 0x7f
let isUnicoded = false;
return hash;
}

for (let index = 0; index < string.length; index++) {
let characterCode = string.charCodeAt(index);
export default function fnv1a(value, {size = 32, utf8Buffer} = {}) {
if (!FNV_PRIMES[size]) {
throw new Error('The `size` option must be one of 32, 64, 128, 256, 512, or 1024');
}

// Non-ASCII characters trigger the Unicode escape logic
if (characterCode > 0x7F && !isUnicoded) {
string = unescape(encodeURIComponent(string));
characterCode = string.charCodeAt(index);
isUnicoded = true;
if (typeof value === 'string') {
if (utf8Buffer) {
return fnv1aEncodeInto(value, size, utf8Buffer);
}

hash ^= BigInt(characterCode);
hash = BigInt.asUintN(size, hash * fnvPrime);
value = cachedEncoder.encode(value);
}

return hash;
return fnv1aUint8Array(value, size);
}
26 changes: 25 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,28 @@ fnv1a('🦄🌈', {size: 128});

Number(fnv1a('🦄🌈', {size: 32}));
//=> 2868248295

const utf8Buffer = new Uint8Array(100);
fnv1a('🦄🌈', {size: 32, utf8Buffer});
//=> 2868248295n

const bytes = new Uint8Array([240, 159, 166, 132, 240, 159, 140, 136]);
fnv1a(bytes, {size: 32});
//=> 2868248295n
```

## API

### fnv1a(string, options?)
### fnv1a(value, options?)

Returns the hash as a positive `BigInt`.

If you need it as a `number`, use `32` as `size` and wrap the return value in `Number(…)`.

#### value

Type: `string | Uint8Array`

#### options

Type: `object`
Expand All @@ -47,6 +59,18 @@ Default: `32`

The bit size of the hash.

##### utf8Buffer

Type: `Uint8Array`

A Uint8Array used to encode the string into UTF-8 bytes.

This array can be reused across calls to `fnv1a`. Doing so will improve performance because it avoids allocating a new Uint8Array when encoding the string.

The size of the array does not have to be large enugh to hold the entire string, but performance will be improved if it is.

This option is only used when `value` is a string.

## Related

- [djb2a](https://github.com/sindresorhus/djb2a) - DJB2a non-cryptographic hash function
Expand Down
33 changes: 33 additions & 0 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,36 @@ test('bigInt() - 1024-bit', t => {
t.is(fnv1a('', {size: 1024}), 14_197_795_064_947_621_068_722_070_641_403_218_320_880_622_795_441_933_960_878_474_914_617_582_723_252_296_732_303_717_722_150_864_096_521_202_355_549_365_628_174_669_108_571_814_760_471_015_076_148_029_755_969_804_077_320_157_692_458_563_003_215_304_957_150_157_403_644_460_363_550_505_412_711_285_966_361_610_267_868_082_893_823_963_790_439_336_411_086_884_584_107_735_010_676_915n);
t.is(fnv1a('hello', {size: 1024}), 162_599_568_807_828_018_278_740_454_090_851_618_076_261_791_243_547_429_330_845_926_617_440_124_701_815_376_483_262_958_546_407_611_470_083_720_486_420_160_817_850_263_303_428_987_405_974_668_389_046_941_240_548_898_833_919_126_704_680_456_253_506_816_487_407_186_600_714_845_619_389_901_326_326_498_663_678_676_823_405_702_541_932_736_634_507_371_229_190_999_806_123_793_839_783_784_715_844_873_833n);
});

test('reusing an Uint8Array for the string encoding', t => {
const utf8Buffer = new Uint8Array(64);

t.is(fnv1a('', {utf8Buffer}), 2_166_136_261n);
t.is(fnv1a('hello world', {utf8Buffer}), 3_582_672_807n);
t.is(fnv1a('hello', {size: 64, utf8Buffer}), 11_831_194_018_420_276_491n);
t.is(fnv1a('🦄🌈', {utf8Buffer}), 2_868_248_295n);
// Test that the string is read correctly even though it is too large to fit in utf8Buffer.
t.is(fnv1a('Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.', {utf8Buffer}), 2_964_896_417n);
});

test('hashing an Uint8Array', t => {
const bytes = new TextEncoder().encode('hello');

t.is(fnv1a(bytes, {size: 32}), 1_335_831_723n);
t.is(fnv1a(bytes, {size: 64}), 11_831_194_018_420_276_491n);
t.is(fnv1a(bytes, {size: 128}), 302_907_886_228_425_533_802_623_465_673_358_913_971n);
t.is(fnv1a(bytes, {size: 256}), 24_621_739_307_028_566_391_642_840_221_992_687_346_817_534_817_626_804_975_463_790_541_119_213_691_899n);
t.is(fnv1a(bytes, {size: 512}), 7_892_563_648_106_928_388_641_744_747_901_962_995_816_211_260_805_030_760_135_011_933_811_709_338_702_442_123_338_016_979_459_597_105_834_714_497_783_048_560_046_644_182_143_206_509_375_819_400_532_849_111n);
t.is(fnv1a(bytes, {size: 1024}), 162_599_568_807_828_018_278_740_454_090_851_618_076_261_791_243_547_429_330_845_926_617_440_124_701_815_376_483_262_958_546_407_611_470_083_720_486_420_160_817_850_263_303_428_987_405_974_668_389_046_941_240_548_898_833_919_126_704_680_456_253_506_816_487_407_186_600_714_845_619_389_901_326_326_498_663_678_676_823_405_702_541_932_736_634_507_371_229_190_999_806_123_793_839_783_784_715_844_873_833n);
});

test('hashing an empty Uint8Array', t => {
const bytes = new Uint8Array(0);

t.is(fnv1a(bytes, {size: 32}), 2_166_136_261n);
t.is(fnv1a(bytes, {size: 64}), 14_695_981_039_346_656_037n);
t.is(fnv1a(bytes, {size: 128}), 144_066_263_297_769_815_596_495_629_667_062_367_629n);
t.is(fnv1a(bytes, {size: 256}), 100_029_257_958_052_580_907_070_968_620_625_704_837_092_796_014_241_193_945_225_284_501_741_471_925_557n);
t.is(fnv1a(bytes, {size: 512}), 9_659_303_129_496_669_498_009_435_400_716_310_466_090_418_745_672_637_896_108_374_329_434_462_657_994_582_932_197_716_438_449_813_051_892_206_539_805_784_495_328_239_340_083_876_191_928_701_583_869_517_785n);
t.is(fnv1a(bytes, {size: 1024}), 14_197_795_064_947_621_068_722_070_641_403_218_320_880_622_795_441_933_960_878_474_914_617_582_723_252_296_732_303_717_722_150_864_096_521_202_355_549_365_628_174_669_108_571_814_760_471_015_076_148_029_755_969_804_077_320_157_692_458_563_003_215_304_957_150_157_403_644_460_363_550_505_412_711_285_966_361_610_267_868_082_893_823_963_790_439_336_411_086_884_584_107_735_010_676_915n);
});

0 comments on commit edb1546

Please sign in to comment.