From d3e5b6f7539b86995aef6e2075c1edb3059385ce Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 24 Mar 2022 11:31:39 -0700 Subject: [PATCH] [ELF] Implement --build-id={md5,sha1} with truncated BLAKE3 --build-id was introduced as "approximation of true uniqueness across all binaries that might be used by overlapping sets of people". It does not require the some resistance mentioned below. In practice, people just use --build-id=md5 for 16-byte build ID and --build-id=sha1 for 20-byte build ID. BLAKE3 has 256-bit key length, which provides 128-bit security against (second-)preimage, collision, and differentiability attacks. Its portable implementation is fast. It additionally provides Arm Neon/AVX2/AVX-512. Just implement --build-id={md5,sha1} with truncated BLAKE3. Linking clang 14 RelWithDebInfo with --threads=8 on a Skylake CPU: * 1.13x as fast with --build-id=md5 * 1.15x as fast with --build-id=sha1 --threads=4 on Apple m1: * 1.25x as fast with --build-id=md5 * 1.17x as fast with --build-id=sha1 Reviewed By: ikudrin Differential Revision: https://reviews.llvm.org/D121531 --- lld/ELF/Writer.cpp | 13 +++++++++---- lld/test/ELF/build-id.s | 8 ++++---- lld/test/ELF/partition-notes.s | 2 +- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 7c596bb788f58e..153095e44b01a1 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -25,10 +25,9 @@ #include "lld/Common/Filesystem.h" #include "lld/Common/Strings.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Support/MD5.h" +#include "llvm/Support/BLAKE3.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/RandomNumberGenerator.h" -#include "llvm/Support/SHA1.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include @@ -2925,6 +2924,12 @@ template void Writer::writeBuildId() { MutableArrayRef output(buildId.get(), hashSize); llvm::ArrayRef input{Out::bufferStart, size_t(fileSize)}; + // Fedora introduced build ID as "approximation of true uniqueness across all + // binaries that might be used by overlapping sets of people". It does not + // need some security goals that some hash algorithms strive to provide, e.g. + // (second-)preimage and collision resistance. In practice people use 'md5' + // and 'sha1' just for different lengths. Implement them with the more + // efficient BLAKE3. switch (config->buildId) { case BuildIdKind::Fast: computeHash(output, input, [](uint8_t *dest, ArrayRef arr) { @@ -2933,12 +2938,12 @@ template void Writer::writeBuildId() { break; case BuildIdKind::Md5: computeHash(output, input, [&](uint8_t *dest, ArrayRef arr) { - memcpy(dest, MD5::hash(arr).data(), hashSize); + memcpy(dest, BLAKE3::hash<16>(arr).data(), hashSize); }); break; case BuildIdKind::Sha1: computeHash(output, input, [&](uint8_t *dest, ArrayRef arr) { - memcpy(dest, SHA1::hash(arr).data(), hashSize); + memcpy(dest, BLAKE3::hash<20>(arr).data(), hashSize); }); break; case BuildIdKind::Uuid: diff --git a/lld/test/ELF/build-id.s b/lld/test/ELF/build-id.s index 63e488ef6d294a..05a5e8d61f92e6 100644 --- a/lld/test/ELF/build-id.s +++ b/lld/test/ELF/build-id.s @@ -69,11 +69,11 @@ _start: # MD5: Contents of section .note.gnu.build-id: # MD5-NEXT: 04000000 10000000 03000000 474e5500 ............GNU. -# MD5-NEXT: 7b00fd9e 054ceb4b 06f64d0e 482cb476 +# MD5-NEXT: dbf0bc13 b3ff11e9 fde6e17c 0304983c # SHA1: Contents of section .note.gnu.build-id: # SHA1-NEXT: 04000000 14000000 03000000 474e5500 ............GNU. -# SHA1-NEXT: 221a99da dd1d2bf3 05e48a91 dde8a0cb +# SHA1-NEXT: 1215775f d3b60050 70afd970 e8a10972 # UUID: Contents of section .note.gnu.build-id: # UUID-NEXT: 04000000 10000000 03000000 474e5500 ............GNU. @@ -89,11 +89,11 @@ _start: # SEPARATE: Hex dump of section '.note.gnu.build-id': # SEPARATE-NEXT: 0x00200198 04000000 14000000 03000000 474e5500 -# SEPARATE-NEXT: 0x002001a8 96820adf d90d5470 0a0c32ff a88c4017 +# SEPARATE-NEXT: 0x002001a8 5cd067a4 2631c0fd 42029037 4b8e0938 # RUN: ld.lld --build-id=sha1 --no-rosegment %t -o %t2 # RUN: llvm-readelf -x .note.gnu.build-id %t2 | FileCheck --check-prefix=NORO %s # NORO: Hex dump of section '.note.gnu.build-id': # NORO-NEXT: 0x00200160 04000000 14000000 03000000 474e5500 -# NORO-NEXT: 0x00200170 cf6d7b3a 0b3297c3 5b47c079 ce048349 +# NORO-NEXT: 0x00200170 a328cc99 45bfc3fc a9fc8615 37102f9d diff --git a/lld/test/ELF/partition-notes.s b/lld/test/ELF/partition-notes.s index b02e490fff2181..9bc43f2fbf9ee4 100644 --- a/lld/test/ELF/partition-notes.s +++ b/lld/test/ELF/partition-notes.s @@ -37,7 +37,7 @@ // CHECK-NEXT: Owner: GNU // CHECK-NEXT: Data size: // CHECK-NEXT: Type: NT_GNU_BUILD_ID (unique build ID bitstring) -// CHECK-NEXT: Build ID: bb5542bd74252653e286044980d602874d237ae0 +// CHECK-NEXT: Build ID: ab81108a3d85b729980356331fddc2bfc4c10177{{$}} // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: ]