Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Support] Introduce the BLAKE3 hashing function implementation
BLAKE3 is a cryptographic hash function that is secure and very performant. The C implementation originates from https://github.com/BLAKE3-team/BLAKE3/tree/1.3.1/c License is at https://github.com/BLAKE3-team/BLAKE3/blob/1.3.1/LICENSE This patch adds: * `llvm/include/llvm-c/blake3.h`: The BLAKE3 C API * `llvm/include/llvm/Support/BLAKE3.h`: C++ wrapper of the C API * `llvm/lib/Support/BLAKE3`: Directory containing the BLAKE3 C implementation files, including the `LICENSE` file * `llvm/unittests/Support/BLAKE3Test.cpp`: unit tests for the BLAKE3 C++ wrapper This initial patch contains the pristine BLAKE3 sources, a follow-up patch will introduce LLVM-specific prefixes to avoid conflicts if a client also links with its own BLAKE3 version. And here's some timings comparing BLAKE3 with LLVM's SHA1/SHA256/MD5. Timings include `AVX512`, `AVX2`, `neon`, and the generic/portable implementations. The table shows the speed-up multiplier of BLAKE3 for hashing 100 MBs: | Processor | SHA1 | SHA256 | MD5 | |-------------------------|-------|--------|------| | Intel Xeon W (AVX512) | 10.4x | 27x | 9.4x | | Intel Xeon W (AVX2) | 6.5x | 17x | 5.9x | | Intel Xeon W (portable) | 1.3x | 3.3x | 1.1x | | M1Pro (neon) | 2.1x | 4.7x | 2.8x | | M1Pro (portable) | 1.1x | 2.4x | 1.5x | Differential Revision: https://reviews.llvm.org/D121510
- Loading branch information
Showing
31 changed files
with
31,785 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#ifndef BLAKE3_H | ||
#define BLAKE3_H | ||
|
||
#include <stddef.h> | ||
#include <stdint.h> | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
#define BLAKE3_VERSION_STRING "1.3.1" | ||
#define BLAKE3_KEY_LEN 32 | ||
#define BLAKE3_OUT_LEN 32 | ||
#define BLAKE3_BLOCK_LEN 64 | ||
#define BLAKE3_CHUNK_LEN 1024 | ||
#define BLAKE3_MAX_DEPTH 54 | ||
|
||
// This struct is a private implementation detail. It has to be here because | ||
// it's part of blake3_hasher below. | ||
typedef struct { | ||
uint32_t cv[8]; | ||
uint64_t chunk_counter; | ||
uint8_t buf[BLAKE3_BLOCK_LEN]; | ||
uint8_t buf_len; | ||
uint8_t blocks_compressed; | ||
uint8_t flags; | ||
} blake3_chunk_state; | ||
|
||
typedef struct { | ||
uint32_t key[8]; | ||
blake3_chunk_state chunk; | ||
uint8_t cv_stack_len; | ||
// The stack size is MAX_DEPTH + 1 because we do lazy merging. For example, | ||
// with 7 chunks, we have 3 entries in the stack. Adding an 8th chunk | ||
// requires a 4th entry, rather than merging everything down to 1, because we | ||
// don't know whether more input is coming. This is different from how the | ||
// reference implementation does things. | ||
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN]; | ||
} blake3_hasher; | ||
|
||
const char *blake3_version(void); | ||
void blake3_hasher_init(blake3_hasher *self); | ||
void blake3_hasher_init_keyed(blake3_hasher *self, | ||
const uint8_t key[BLAKE3_KEY_LEN]); | ||
void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context); | ||
void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context, | ||
size_t context_len); | ||
void blake3_hasher_update(blake3_hasher *self, const void *input, | ||
size_t input_len); | ||
void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out, | ||
size_t out_len); | ||
void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek, | ||
uint8_t *out, size_t out_len); | ||
void blake3_hasher_reset(blake3_hasher *self); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif | ||
|
||
#endif /* BLAKE3_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
//==- BLAKE3.h - BLAKE3 C++ wrapper for LLVM ---------------------*- C++ -*-==// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// This is a C++ wrapper of the BLAKE3 C interface. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_SUPPORT_BLAKE3_H | ||
#define LLVM_SUPPORT_BLAKE3_H | ||
|
||
#include "llvm-c/blake3.h" | ||
#include "llvm/ADT/ArrayRef.h" | ||
#include "llvm/ADT/StringRef.h" | ||
|
||
namespace llvm { | ||
|
||
/// The constant \p BLAKE3_OUT_LEN provides the default output length, | ||
/// 32 bytes, which is recommended for most callers. | ||
/// | ||
/// Outputs shorter than the default length of 32 bytes (256 bits) provide | ||
/// less security. An N-bit BLAKE3 output is intended to provide N bits of | ||
/// first and second preimage resistance and N/2 bits of collision | ||
/// resistance, for any N up to 256. Longer outputs don't provide any | ||
/// additional security. | ||
/// | ||
/// Shorter BLAKE3 outputs are prefixes of longer ones. Explicitly | ||
/// requesting a short output is equivalent to truncating the default-length | ||
/// output. | ||
template <size_t NumBytes = BLAKE3_OUT_LEN> | ||
using BLAKE3Result = std::array<uint8_t, NumBytes>; | ||
|
||
/// A class that wrap the BLAKE3 algorithm. | ||
class BLAKE3 { | ||
public: | ||
BLAKE3() { init(); } | ||
|
||
/// Reinitialize the internal state | ||
void init() { blake3_hasher_init(&Hasher); } | ||
|
||
/// Digest more data. | ||
void update(ArrayRef<uint8_t> Data) { | ||
blake3_hasher_update(&Hasher, Data.data(), Data.size()); | ||
} | ||
|
||
/// Digest more data. | ||
void update(StringRef Str) { | ||
blake3_hasher_update(&Hasher, Str.data(), Str.size()); | ||
} | ||
|
||
/// Finalize the hasher and put the result in \p Result. | ||
/// This doesn't modify the hasher itself, and it's possible to finalize again | ||
/// after adding more input. | ||
template <size_t NumBytes = BLAKE3_OUT_LEN> | ||
void final(BLAKE3Result<NumBytes> &Result) { | ||
blake3_hasher_finalize(&Hasher, Result.data(), Result.size()); | ||
} | ||
|
||
/// Finalize the hasher and return an output of any length, given in bytes. | ||
/// This doesn't modify the hasher itself, and it's possible to finalize again | ||
/// after adding more input. | ||
template <size_t NumBytes = BLAKE3_OUT_LEN> BLAKE3Result<NumBytes> final() { | ||
BLAKE3Result<NumBytes> Result; | ||
blake3_hasher_finalize(&Hasher, Result.data(), Result.size()); | ||
return Result; | ||
} | ||
|
||
/// Returns a BLAKE3 hash for the given data. | ||
template <size_t NumBytes = BLAKE3_OUT_LEN> | ||
static BLAKE3Result<NumBytes> hash(ArrayRef<uint8_t> Data) { | ||
BLAKE3 Hasher; | ||
Hasher.update(Data); | ||
return Hasher.final<NumBytes>(); | ||
} | ||
|
||
private: | ||
blake3_hasher Hasher; | ||
}; | ||
|
||
} // namespace llvm | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
DisableFormat: true | ||
SortIncludes: Never |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
set(LLVM_BLAKE3_FILES | ||
BLAKE3/blake3.c | ||
BLAKE3/blake3_dispatch.c | ||
BLAKE3/blake3_portable.c | ||
) | ||
|
||
# The BLAKE3 team recommends using the assembly versions, from the README: | ||
# | ||
# "For each of the x86 SIMD instruction sets, four versions are available: | ||
# three flavors of assembly (Unix, Windows MSVC, and Windows GNU) and one | ||
# version using C intrinsics. The assembly versions are generally | ||
# preferred. They perform better, they perform more consistently across | ||
# different compilers, and they build more quickly." | ||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$") | ||
if (MSVC) | ||
list(APPEND LLVM_BLAKE3_FILES | ||
BLAKE3/blake3_sse2_x86-64_windows_msvc.asm | ||
BLAKE3/blake3_sse41_x86-64_windows_msvc.asm | ||
BLAKE3/blake3_avx2_x86-64_windows_msvc.asm | ||
BLAKE3/blake3_avx512_x86-64_windows_msvc.asm | ||
) | ||
elseif(WIN32) | ||
list(APPEND LLVM_BLAKE3_FILES | ||
BLAKE3/blake3_sse2_x86-64_windows_gnu.S | ||
BLAKE3/blake3_sse41_x86-64_windows_gnu.S | ||
BLAKE3/blake3_avx2_x86-64_windows_gnu.S | ||
BLAKE3/blake3_avx512_x86-64_windows_gnu.S | ||
) | ||
else() | ||
list(APPEND LLVM_BLAKE3_FILES | ||
BLAKE3/blake3_sse2_x86-64_unix.S | ||
BLAKE3/blake3_sse41_x86-64_unix.S | ||
BLAKE3/blake3_avx2_x86-64_unix.S | ||
BLAKE3/blake3_avx512_x86-64_unix.S | ||
) | ||
endif() | ||
endif() | ||
|
||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "arm") | ||
list(APPEND LLVM_BLAKE3_FILES | ||
BLAKE3/blake3_neon.c | ||
) | ||
endif() | ||
|
||
set(LLVM_BLAKE3_FILES | ||
${LLVM_BLAKE3_FILES} | ||
PARENT_SCOPE | ||
) |
Oops, something went wrong.