Skip to content

Commit

Permalink
Revert "[clang] Speedup line offset mapping computation"
Browse files Browse the repository at this point in the history
This reverts commit 6951b72.
Breaks several bots, see comments on https://reviews.llvm.org/D99409
  • Loading branch information
nico committed Apr 7, 2021
1 parent 302e748 commit c22b09d
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 54 deletions.
64 changes: 11 additions & 53 deletions clang/lib/Basic/SourceManager.cpp
Expand Up @@ -1252,21 +1252,12 @@ unsigned SourceManager::getPresumedColumnNumber(SourceLocation Loc,
return PLoc.getColumn();
}

// Check if mutli-byte word x has bytes between m and n, included. This may also
// catch bytes equal to n + 1.
// The returned value holds a 0x80 at each byte position that holds a match.
// see http://graphics.stanford.edu/~seander/bithacks.html#HasBetweenInWord
template <class T>
static constexpr inline T likelyhasbetween(T x, unsigned char m,
unsigned char n) {
return ((x - ~0UL / 255 * (n + 1)) & ~x &
(x & ~0UL / 255 * 127) + ~0UL / 255 * (127 - (m - 1))) &
~0UL / 255 * 128;
}
#ifdef __SSE2__
#include <emmintrin.h>
#endif

LineOffsetMapping LineOffsetMapping::get(llvm::MemoryBufferRef Buffer,
llvm::BumpPtrAllocator &Alloc) {

// Find the file offsets of all of the *physical* source lines. This does
// not look at trigraphs, escaped newlines, or anything else tricky.
SmallVector<unsigned, 256> LineOffsets;
Expand All @@ -1277,51 +1268,18 @@ LineOffsetMapping LineOffsetMapping::get(llvm::MemoryBufferRef Buffer,
const unsigned char *Buf = (const unsigned char *)Buffer.getBufferStart();
const unsigned char *End = (const unsigned char *)Buffer.getBufferEnd();
const std::size_t BufLen = End - Buf;

unsigned I = 0;
uint64_t Word;

// scan sizeof(Word) bytes at a time for new lines.
// This is much faster than scanning each byte independently.
if (BufLen > sizeof(Word)) {
do {
memcpy(&Word, Buf + I, sizeof(Word));
// no new line => jump over sizeof(Word) bytes.
auto Mask = likelyhasbetween(Word, '\n', '\r');
if (!Mask) {
I += sizeof(Word);
continue;
}

// At that point, Mask contains 0x80 set at each byte that holds a value
// in [\n, \r + 1 [

// Scan for the next newline - it's very likely there's one.
unsigned N =
llvm::countTrailingZeros(Mask) - 7; // -7 because 0x80 is the marker
Word >>= N;
I += N / 8 + 1;
unsigned char Byte = Word;
if (Byte == '\n') {
LineOffsets.push_back(I);
} else if (Byte == '\r') {
while (I < BufLen) {
// Use a fast check to catch both newlines
if (LLVM_UNLIKELY(Buf[I] <= std::max('\n', '\r'))) {
if (Buf[I] == '\n') {
LineOffsets.push_back(I + 1);
} else if (Buf[I] == '\r') {
// If this is \r\n, skip both characters.
if (Buf[I] == '\n')
if (I + 1 < BufLen && Buf[I + 1] == '\n')
++I;
LineOffsets.push_back(I);
LineOffsets.push_back(I + 1);
}
} while (I < BufLen - sizeof(Word) - 1);
}

// Handle tail using a regular check.
while (I < BufLen) {
if (Buf[I] == '\n') {
LineOffsets.push_back(I + 1);
} else if (Buf[I] == '\r') {
// If this is \r\n, skip both characters.
if (I + 1 < BufLen && Buf[I + 1] == '\n')
++I;
LineOffsets.push_back(I + 1);
}
++I;
}
Expand Down
Expand Up @@ -11,7 +11,7 @@

; RUN: sed -e "s,SRC_COMPDIR,./Inputs,g" %p/Inputs/source-interleave.ll > %t-relative-path.ll
; RUN: llc -o %t-relative-path.o -filetype=obj -mtriple=x86_64-pc-linux %t-relative-path.ll
; RUN: mkdir -p %t0 && cd %t0 && llvm-objdump --prefix myprefix --source %t-relative-path.o 2>&1 | \
; RUN: llvm-objdump --prefix myprefix --source %t-relative-path.o 2>&1 | \
; RUN: FileCheck %s --check-prefix=CHECK-BROKEN-PREFIX -DFILE=%t-relative-path.o -DPREFIX=. -DCOMPDIR=/Inputs
; CHECK-BROKEN-PREFIX: warning: '[[FILE]]': failed to find source [[PREFIX]][[COMPDIR]]{{[/\\]}}source-interleave-x86_64.c

Expand Down

0 comments on commit c22b09d

Please sign in to comment.