Skip to content

Commit

Permalink
Syndicate, test and fix base64 implementation
Browse files Browse the repository at this point in the history
Move Base64 implementation from clangd/SemanticHighlighting to
llvm/Support/Base64, fix its implementation and provide a decent test suite.

Previous implementation code was using + operator instead of | to combine some
results, which is a problem when shifting signed values. (0xFF << 16) is
implicitly converted to a (signed) int, and thus results in 0xffff0000, which is
negative. Combining negative numbers with a + in that context is not what we
want to do.

This fixes #149.

Differential Revision: https://reviews.llvm.org/D75057
  • Loading branch information
serge-sans-paille committed Mar 2, 2020
1 parent c443b61 commit 5a1958f
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 48 deletions.
32 changes: 1 addition & 31 deletions clang-tools-extra/clangd/SemanticHighlighting.cpp
Expand Up @@ -26,6 +26,7 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Base64.h"
#include "llvm/Support/Casting.h"
#include <algorithm>

Expand Down Expand Up @@ -283,37 +284,6 @@ class CollectExtraHighlightings
HighlightingsBuilder &H;
};

// Encode binary data into base64.
// This was copied from compiler-rt/lib/fuzzer/FuzzerUtil.cpp.
// FIXME: Factor this out into llvm/Support?
std::string encodeBase64(const llvm::SmallVectorImpl<char> &Bytes) {
static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
std::string Res;
size_t I;
for (I = 0; I + 2 < Bytes.size(); I += 3) {
uint32_t X = (Bytes[I] << 16) + (Bytes[I + 1] << 8) + Bytes[I + 2];
Res += Table[(X >> 18) & 63];
Res += Table[(X >> 12) & 63];
Res += Table[(X >> 6) & 63];
Res += Table[X & 63];
}
if (I + 1 == Bytes.size()) {
uint32_t X = (Bytes[I] << 16);
Res += Table[(X >> 18) & 63];
Res += Table[(X >> 12) & 63];
Res += "==";
} else if (I + 2 == Bytes.size()) {
uint32_t X = (Bytes[I] << 16) + (Bytes[I + 1] << 8);
Res += Table[(X >> 18) & 63];
Res += Table[(X >> 12) & 63];
Res += Table[(X >> 6) & 63];
Res += "=";
}
return Res;
}

void write32be(uint32_t I, llvm::raw_ostream &OS) {
std::array<char, 4> Buf;
llvm::support::endian::write32be(Buf.data(), I);
Expand Down
38 changes: 21 additions & 17 deletions compiler-rt/lib/fuzzer/FuzzerUtil.cpp
Expand Up @@ -151,32 +151,36 @@ bool ParseDictionaryFile(const std::string &Text, Vector<Unit> *Units) {
return true;
}

// Code duplicated (and tested) in llvm/include/llvm/Support/Base64.h
std::string Base64(const Unit &U) {
static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
std::string Res;
size_t i;
for (i = 0; i + 2 < U.size(); i += 3) {
uint32_t x = (U[i] << 16) + (U[i + 1] << 8) + U[i + 2];
Res += Table[(x >> 18) & 63];
Res += Table[(x >> 12) & 63];
Res += Table[(x >> 6) & 63];
Res += Table[x & 63];
std::string Buffer;
Buffer.resize(((U.size() + 2) / 3) * 4);

size_t i = 0, j = 0;
for (size_t n = U.size() / 3 * 3; i < n; i += 3, j += 4) {
uint32_t x = (U[i] << 16) | (U[i + 1] << 8) | U[i + 2];
Buffer[j + 0] = Table[(x >> 18) & 63];
Buffer[j + 1] = Table[(x >> 12) & 63];
Buffer[j + 2] = Table[(x >> 6) & 63];
Buffer[j + 3] = Table[x & 63];
}
if (i + 1 == U.size()) {
uint32_t x = (U[i] << 16);
Res += Table[(x >> 18) & 63];
Res += Table[(x >> 12) & 63];
Res += "==";
Buffer[j + 0] = Table[(x >> 18) & 63];
Buffer[j + 1] = Table[(x >> 12) & 63];
Buffer[j + 2] = '=';
Buffer[j + 3] = '=';
} else if (i + 2 == U.size()) {
uint32_t x = (U[i] << 16) + (U[i + 1] << 8);
Res += Table[(x >> 18) & 63];
Res += Table[(x >> 12) & 63];
Res += Table[(x >> 6) & 63];
Res += "=";
uint32_t x = (U[i] << 16) | (U[i + 1] << 8);
Buffer[j + 0] = Table[(x >> 18) & 63];
Buffer[j + 1] = Table[(x >> 12) & 63];
Buffer[j + 2] = Table[(x >> 6) & 63];
Buffer[j + 3] = '=';
}
return Res;
return Buffer;
}

static std::mutex SymbolizeMutex;
Expand Down
53 changes: 53 additions & 0 deletions llvm/include/llvm/Support/Base64.h
@@ -0,0 +1,53 @@
//===--- Base64.h - Base64 Encoder/Decoder ----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file provides generic base64 encoder/decoder.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_SUPPORT_BASE64_H
#define LLVM_SUPPORT_BASE64_H

#include <string>

namespace llvm {

template <class InputBytes> std::string encodeBase64(InputBytes const &Bytes) {
static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
std::string Buffer;
Buffer.resize(((Bytes.size() + 2) / 3) * 4);

size_t i = 0, j = 0;
for (size_t n = Bytes.size() / 3 * 3; i < n; i += 3, j += 4) {
uint32_t x = (Bytes[i] << 16) | (Bytes[i + 1] << 8) | Bytes[i + 2];
Buffer[j + 0] = Table[(x >> 18) & 63];
Buffer[j + 1] = Table[(x >> 12) & 63];
Buffer[j + 2] = Table[(x >> 6) & 63];
Buffer[j + 3] = Table[x & 63];
}
if (i + 1 == Bytes.size()) {
uint32_t x = (Bytes[i] << 16);
Buffer[j + 0] = Table[(x >> 18) & 63];
Buffer[j + 1] = Table[(x >> 12) & 63];
Buffer[j + 2] = '=';
Buffer[j + 3] = '=';
} else if (i + 2 == Bytes.size()) {
uint32_t x = (Bytes[i] << 16) | (Bytes[i + 1] << 8);
Buffer[j + 0] = Table[(x >> 18) & 63];
Buffer[j + 1] = Table[(x >> 12) & 63];
Buffer[j + 2] = Table[(x >> 6) & 63];
Buffer[j + 3] = '=';
}
return Buffer;
}

} // end namespace llvm

#endif
53 changes: 53 additions & 0 deletions llvm/unittests/Support/Base64Test.cpp
@@ -0,0 +1,53 @@
//===- llvm/unittest/Support/Base64Test.cpp - Base64 tests
//--------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements unit tests for the Base64 functions.
//
//===----------------------------------------------------------------------===//

#include "llvm/Support/Base64.h"
#include "llvm/ADT/StringRef.h"
#include "gtest/gtest.h"

using namespace llvm;

namespace {
/// Tests an arbitrary set of bytes passed as \p Input.
void TestBase64(StringRef Input, StringRef Final) {
auto Res = encodeBase64(Input);
EXPECT_EQ(Res, Final);
}

} // namespace

TEST(Base64Test, Base64) {
// from: https://tools.ietf.org/html/rfc4648#section-10
TestBase64("", "");
TestBase64("f", "Zg==");
TestBase64("fo", "Zm8=");
TestBase64("foo", "Zm9v");
TestBase64("foob", "Zm9vYg==");
TestBase64("fooba", "Zm9vYmE=");
TestBase64("foobar", "Zm9vYmFy");

// With non-printable values.
char NonPrintableVector[] = {0x00, 0x00, 0x00, 0x46,
0x00, 0x08, (char)0xff, (char)0xee};
TestBase64(StringRef(NonPrintableVector, sizeof(NonPrintableVector)),
"AAAARgAI/+4=");

// Large test case
char LargeVector[] = {0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b,
0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20, 0x66, 0x6f,
0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, 0x73, 0x20, 0x6f,
0x76, 0x65, 0x72, 0x20, 0x31, 0x33, 0x20, 0x6c, 0x61,
0x7a, 0x79, 0x20, 0x64, 0x6f, 0x67, 0x73, 0x2e};
TestBase64(LargeVector,
"VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIDEzIGxhenkgZG9ncy4=");
}
1 change: 1 addition & 0 deletions llvm/unittests/Support/CMakeLists.txt
Expand Up @@ -9,6 +9,7 @@ add_llvm_unittest(SupportTests
AnnotationsTest.cpp
ARMAttributeParser.cpp
ArrayRecyclerTest.cpp
Base64Test.cpp
BinaryStreamTest.cpp
BlockFrequencyTest.cpp
BranchProbabilityTest.cpp
Expand Down

0 comments on commit 5a1958f

Please sign in to comment.