106 changes: 37 additions & 69 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15164,6 +15164,10 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
BoUpSLP::ValueSet VectorizedStores;
bool Changed = false;

// Stores the pair of stores (first_store, last_store) in a range, that were
// already tried to be vectorized. Allows to skip the store ranges that were
// already tried to be vectorized but the attempts were unsuccessful.
DenseSet<std::pair<Value *, Value *>> TriedSequences;
struct StoreDistCompare {
bool operator()(const std::pair<unsigned, int> &Op1,
const std::pair<unsigned, int> &Op2) const {
Expand Down Expand Up @@ -15205,10 +15209,8 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
Type *ValueTy = StoreTy;
if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
ValueTy = Trunc->getSrcTy();
unsigned MinVF = std::max<unsigned>(
2, PowerOf2Ceil(TTI->getStoreMinimumVF(
R.getMinVF(DL->getTypeStoreSizeInBits(StoreTy)), StoreTy,
ValueTy)));
unsigned MinVF = PowerOf2Ceil(TTI->getStoreMinimumVF(
R.getMinVF(DL->getTypeStoreSizeInBits(StoreTy)), StoreTy, ValueTy));

if (MaxVF < MinVF) {
LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
Expand All @@ -15234,74 +15236,40 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
VF = Size > MaxVF ? NonPowerOf2VF : Size;
Size *= 2;
});
unsigned End = Operands.size();
unsigned Repeat = 0;
constexpr unsigned MaxAttempts = 2;
SmallBitVector Range(Operands.size());
while (true) {
++Repeat;
for (unsigned Size : CandidateVFs) {
int StartIdx = Range.find_first_unset();
while (StartIdx != -1) {
int EndIdx = Range.find_next(StartIdx);
unsigned Sz = EndIdx == -1 ? End : EndIdx;
for (unsigned Cnt = StartIdx; Cnt + Size <= Sz;) {
ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);
assert(all_of(Slice,
[&](Value *V) {
return cast<StoreInst>(V)
->getValueOperand()
->getType() ==
cast<StoreInst>(Slice.front())
->getValueOperand()
->getType();
}) &&
"Expected all operands of same type.");
if (vectorizeStoreChain(Slice, R, Cnt, MinVF)) {
// Mark the vectorized stores so that we don't vectorize them
// again.
VectorizedStores.insert(Slice.begin(), Slice.end());
// Mark the vectorized stores so that we don't vectorize them
// again.
Changed = true;
// If we vectorized initial block, no need to try to vectorize
// it again.
Range.set(Cnt, Cnt + Size);
if (Cnt < StartIdx + MinVF)
Range.set(StartIdx, Cnt);
if (Cnt > EndIdx - Size - MinVF) {
Range.set(Cnt + Size, EndIdx);
End = Cnt;
}
Cnt += Size;
continue;
}
++Cnt;
}
if (Sz >= End)
break;
StartIdx = Range.find_next_unset(EndIdx);
unsigned StartIdx = 0;
for (unsigned Size : CandidateVFs) {
for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);
assert(
all_of(
Slice,
[&](Value *V) {
return cast<StoreInst>(V)->getValueOperand()->getType() ==
cast<StoreInst>(Slice.front())
->getValueOperand()
->getType();
}) &&
"Expected all operands of same type.");
if (!VectorizedStores.count(Slice.front()) &&
!VectorizedStores.count(Slice.back()) &&
TriedSequences.insert(std::make_pair(Slice.front(), Slice.back()))
.second &&
vectorizeStoreChain(Slice, R, Cnt, MinVF)) {
// Mark the vectorized stores so that we don't vectorize them again.
VectorizedStores.insert(Slice.begin(), Slice.end());
Changed = true;
// If we vectorized initial block, no need to try to vectorize it
// again.
if (Cnt == StartIdx)
StartIdx += Size;
Cnt += Size;
continue;
}
++Cnt;
}
// All values vectorize - exit.
if (Range.all())
break;
// Check if tried all attempts or no need for the last attempts at all.
if (Repeat >= MaxAttempts)
break;
constexpr unsigned MaxVFScale = 4;
constexpr unsigned StoresLimit = 16;
const unsigned MaxTotalNum = std::min(
std::max<unsigned>(StoresLimit, MaxVFScale * MaxVF),
bit_floor(static_cast<unsigned>(Range.find_last_unset() -
Range.find_first_unset() + 1)));
if (MaxVF >= MaxTotalNum)
// Check if the whole array was vectorized already - exit.
if (StartIdx >= Operands.size())
break;
// Last attempt to vectorize max number of elements, if all previous
// attempts were unsuccessful because of the cost issues.
CandidateVFs.clear();
for (unsigned Size = MaxTotalNum; Size > MaxVF; Size /= 2)
CandidateVFs.push_back(Size);
}
}
};
Expand Down
582 changes: 354 additions & 228 deletions llvm/test/Analysis/CostModel/RISCV/rvv-cmp.ll

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions llvm/test/Analysis/CostModel/RISCV/rvv-select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ define void @select() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = select i1 undef, <vscale x 2 x i1> undef, <vscale x 2 x i1> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = select i1 undef, <vscale x 4 x i1> undef, <vscale x 4 x i1> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %17 = select i1 undef, <vscale x 8 x i1> undef, <vscale x 8 x i1> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %18 = select i1 undef, <vscale x 16 x i1> undef, <vscale x 16 x i1> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %19 = select i1 undef, <vscale x 32 x i1> undef, <vscale x 32 x i1> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = select i1 undef, <vscale x 16 x i1> undef, <vscale x 16 x i1> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %19 = select i1 undef, <vscale x 32 x i1> undef, <vscale x 32 x i1> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = select <vscale x 1 x i1> undef, <vscale x 1 x i1> undef, <vscale x 1 x i1> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %21 = select <vscale x 2 x i1> undef, <vscale x 2 x i1> undef, <vscale x 2 x i1> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = select <vscale x 4 x i1> undef, <vscale x 4 x i1> undef, <vscale x 4 x i1> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %23 = select <vscale x 8 x i1> undef, <vscale x 8 x i1> undef, <vscale x 8 x i1> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = select <vscale x 16 x i1> undef, <vscale x 16 x i1> undef, <vscale x 16 x i1> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %25 = select <vscale x 32 x i1> undef, <vscale x 32 x i1> undef, <vscale x 32 x i1> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %24 = select <vscale x 16 x i1> undef, <vscale x 16 x i1> undef, <vscale x 16 x i1> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = select <vscale x 32 x i1> undef, <vscale x 32 x i1> undef, <vscale x 32 x i1> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = select i1 undef, i8 undef, i8 undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = select i1 undef, <1 x i8> undef, <1 x i8> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = select i1 undef, <2 x i8> undef, <2 x i8> undef
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -437,14 +437,17 @@ define i8 @t17_oneuse(i8 %x) {
ret i8 %x.roundedup
}

; Bias is equal to the alignment-1 (as opposed to alignment),
; so we can just replace %x.roundedup with %x.biased.highbits
; Negative test: We can't replace with %x.biased.highbits because it is
; more poisonous.
define <2 x i4> @t18_replacement_0b0001(<2 x i4> %x) {
; CHECK-LABEL: @t18_replacement_0b0001(
; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X:%.*]], <i4 3, i4 3>
; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i4> [[X:%.*]], <i4 3, i4 3>
; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i4> [[X_LOWBITS]], zeroinitializer
; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X]], <i4 3, i4 3>
; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], <i4 -4, i4 poison>
; CHECK-NEXT: call void @use.v2i4(<2 x i4> [[X_BIASED_HIGHBITS]])
; CHECK-NEXT: ret <2 x i4> [[X_BIASED_HIGHBITS]]
; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i4> [[X]], <2 x i4> [[X_BIASED_HIGHBITS]]
; CHECK-NEXT: ret <2 x i4> [[X_ROUNDEDUP]]
;
%x.lowbits = and <2 x i4> %x, <i4 3, i4 3>
%x.lowbits.are.zero = icmp eq <2 x i4> %x.lowbits, <i4 0, i4 0>
Expand All @@ -454,12 +457,17 @@ define <2 x i4> @t18_replacement_0b0001(<2 x i4> %x) {
%x.roundedup = select <2 x i1> %x.lowbits.are.zero, <2 x i4> %x, <2 x i4> %x.biased.highbits
ret <2 x i4> %x.roundedup
}
; Negative test: We can't replace with %x.biased.highbits because it is
; more poisonous.
define <2 x i4> @t18_replacement_0b0010(<2 x i4> %x) {
; CHECK-LABEL: @t18_replacement_0b0010(
; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X:%.*]], <i4 3, i4 poison>
; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i4> [[X:%.*]], <i4 3, i4 3>
; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i4> [[X_LOWBITS]], zeroinitializer
; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X]], <i4 3, i4 poison>
; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], <i4 -4, i4 -4>
; CHECK-NEXT: call void @use.v2i4(<2 x i4> [[X_BIASED_HIGHBITS]])
; CHECK-NEXT: ret <2 x i4> [[X_BIASED_HIGHBITS]]
; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i4> [[X]], <2 x i4> [[X_BIASED_HIGHBITS]]
; CHECK-NEXT: ret <2 x i4> [[X_ROUNDEDUP]]
;
%x.lowbits = and <2 x i4> %x, <i4 3, i4 3>
%x.lowbits.are.zero = icmp eq <2 x i4> %x.lowbits, <i4 0, i4 0>
Expand Down
46 changes: 35 additions & 11 deletions llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll
Original file line number Diff line number Diff line change
Expand Up @@ -100,17 +100,41 @@ define void @store_i8(ptr nocapture %0, i32 %1, i32 %2) {
define void @store_i64(ptr nocapture %0, i32 %1, i32 %2) {
; SSE-LABEL: @store_i64(
; SSE-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1:%.*]] to i64
; SSE-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA5:![0-9]+]]
; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i64 0
; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <4 x i32> zeroinitializer
; SSE-NEXT: [[TMP8:%.*]] = mul <4 x i64> [[TMP5]], [[TMP7]]
; SSE-NEXT: [[TMP9:%.*]] = lshr <4 x i64> [[TMP8]], <i64 15, i64 15, i64 15, i64 15>
; SSE-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32>
; SSE-NEXT: [[TMP11:%.*]] = icmp ult <4 x i32> [[TMP10]], <i32 255, i32 255, i32 255, i32 255>
; SSE-NEXT: [[TMP12:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32>
; SSE-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>
; SSE-NEXT: [[TMP14:%.*]] = zext <4 x i32> [[TMP13]] to <4 x i64>
; SSE-NEXT: store <4 x i64> [[TMP14]], ptr [[TMP0]], align 8, !tbaa [[TBAA5]]
; SSE-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA5:![0-9]+]]
; SSE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], [[TMP4]]
; SSE-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP6]], 15
; SSE-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
; SSE-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 255
; SSE-NEXT: [[TMP10:%.*]] = and i64 [[TMP7]], 4294967295
; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], i64 [[TMP10]], i64 255
; SSE-NEXT: store i64 [[TMP11]], ptr [[TMP0]], align 8, !tbaa [[TBAA5]]
; SSE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
; SSE-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP12]], align 8, !tbaa [[TBAA5]]
; SSE-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP4]]
; SSE-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP14]], 15
; SSE-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP15]] to i32
; SSE-NEXT: [[TMP17:%.*]] = icmp ult i32 [[TMP16]], 255
; SSE-NEXT: [[TMP18:%.*]] = and i64 [[TMP15]], 4294967295
; SSE-NEXT: [[TMP19:%.*]] = select i1 [[TMP17]], i64 [[TMP18]], i64 255
; SSE-NEXT: store i64 [[TMP19]], ptr [[TMP12]], align 8, !tbaa [[TBAA5]]
; SSE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
; SSE-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP20]], align 8, !tbaa [[TBAA5]]
; SSE-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], [[TMP4]]
; SSE-NEXT: [[TMP23:%.*]] = lshr i64 [[TMP22]], 15
; SSE-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32
; SSE-NEXT: [[TMP25:%.*]] = icmp ult i32 [[TMP24]], 255
; SSE-NEXT: [[TMP26:%.*]] = and i64 [[TMP23]], 4294967295
; SSE-NEXT: [[TMP27:%.*]] = select i1 [[TMP25]], i64 [[TMP26]], i64 255
; SSE-NEXT: store i64 [[TMP27]], ptr [[TMP20]], align 8, !tbaa [[TBAA5]]
; SSE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 24
; SSE-NEXT: [[TMP29:%.*]] = load i64, ptr [[TMP28]], align 8, !tbaa [[TBAA5]]
; SSE-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], [[TMP4]]
; SSE-NEXT: [[TMP31:%.*]] = lshr i64 [[TMP30]], 15
; SSE-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP31]] to i32
; SSE-NEXT: [[TMP33:%.*]] = icmp ult i32 [[TMP32]], 255
; SSE-NEXT: [[TMP34:%.*]] = and i64 [[TMP31]], 4294967295
; SSE-NEXT: [[TMP35:%.*]] = select i1 [[TMP33]], i64 [[TMP34]], i64 255
; SSE-NEXT: store i64 [[TMP35]], ptr [[TMP28]], align 8, !tbaa [[TBAA5]]
; SSE-NEXT: ret void
;
; AVX-LABEL: @store_i64(
Expand Down
74 changes: 41 additions & 33 deletions llvm/test/tools/llvm-readobj/ELF/relr-relocs.test
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
## This is a test to test how SHT_RELR sections are dumped.

# RUN: yaml2obj --docnum=1 %s -o %t1
# RUN: llvm-objcopy --strip-all %t1 %t1.stripped
# RUN: llvm-readobj --relocations --raw-relr %t1 \
# RUN: | FileCheck --check-prefix=RAW-LLVM1 %s
# RAW-LLVM1: Section (1) .relr.dyn {
Expand Down Expand Up @@ -51,9 +50,9 @@
# RUN: llvm-readelf --relocations %t1 | FileCheck --check-prefix=GNU1 --match-full-lines --strict-whitespace %s
# GNU1:Relocation section '.relr.dyn' at offset 0x40 contains 21 entries:
# GNU1-NEXT:Index: Entry Address Symbolic Address
# GNU1-NEXT:0000: 0000000000010d60 0000000000010d60 base + 0x60
# GNU1-NEXT:0001: 0000000000000103 0000000000010d68 base + 0x68
# GNU1-NEXT: 0000000000010da0 base + 0xa0
# GNU1-NEXT:0000: 0000000000010d60 0000000000010d60
# GNU1-NEXT:0001: 0000000000000103 0000000000010d68
# GNU1-NEXT: 0000000000010da0 base + 0x30
# GNU1-NEXT:0002: 0000000000020000 0000000000020000 foo
# GNU1-NEXT:0003: 00000000000f0501 0000000000020040 foo + 0x40
# GNU1-NEXT: 0000000000020050 foo + 0x50
Expand All @@ -74,6 +73,7 @@
# GNU1-NEXT: 0000000000020390 bar + 0x190

## The addresses are not symbolized in the absence of .symtab.
# RUN: llvm-objcopy --strip-all %t1 %t1.stripped
# RUN: llvm-readelf --relocations %t1.stripped | FileCheck --check-prefix=GNU1S --match-full-lines --strict-whitespace %s
# GNU1S:Relocation section '.relr.dyn' at offset 0x40 contains 21 entries:
# GNU1S-NEXT:Index: Entry Address Symbolic Address
Expand All @@ -93,12 +93,17 @@ Sections:
Entries: [ 0x0000000000010D60, 0x0000000000000103, 0x0000000000020000,
0x00000000000F0501, 0x000A700550400009 ]
Symbols:
- Name: bar
Value: 0x20200
- Name: foo
Value: 0x20000
- Name: base
Value: 0x10d00
- Name: bar
Value: 0x20200
Section: .relr.dyn
- Name: foo
Value: 0x20000
Section: .relr.dyn
- Name: base
Value: 0x10d70
Section: .relr.dyn
- Name: ignored
Value: 0x20210

# RUN: yaml2obj --docnum=2 %s -o %t2
# RUN: llvm-readobj --relocations --raw-relr %t2 | \
Expand Down Expand Up @@ -140,23 +145,23 @@ Symbols:
# RAW-GNU2-NEXT: 000f0501
# RAW-GNU2-NEXT: 50400009

# RUN: llvm-readelf --relocations %t2 | FileCheck --check-prefix=GNU2 %s
# GNU2: Relocation section '.relr.dyn' at offset 0x34 contains 14 entries:
# GNU2-NEXT: Index: Entry Address Symbolic Address
# GNU2-NEXT: 0000: 00010d60 00010d60 .relr.dyn
# GNU2-NEXT: 0001: 00000103 00010d64 .relr.dyn + 0x4
# GNU2-NEXT: 00010d80 .relr.dyn + 0x20
# GNU2-NEXT: 0002: 00020000 00020000 .relr.dyn + 0xf2a0
# GNU2-NEXT: 0003: 000f0501 00020020 .relr.dyn + 0xf2c0
# GNU2-NEXT: 00020028 .relr.dyn + 0xf2c8
# GNU2-NEXT: 00020040 .relr.dyn + 0xf2e0
# GNU2-NEXT: 00020044 .relr.dyn + 0xf2e4
# GNU2-NEXT: 00020048 .relr.dyn + 0xf2e8
# GNU2-NEXT: 0002004c .relr.dyn + 0xf2ec
# GNU2-NEXT: 0004: 50400009 00020088 .relr.dyn + 0xf328
# GNU2-NEXT: 000200d4 .relr.dyn + 0xf374
# GNU2-NEXT: 000200ec .relr.dyn + 0xf38c
# GNU2-NEXT: 000200f4 .relr.dyn + 0xf394
# RUN: llvm-readelf --relocations %t2 | FileCheck --check-prefix=GNU2 --match-full-lines --strict-whitespace %s
# GNU2:Relocation section '.relr.dyn' at offset 0x34 contains 14 entries:
# GNU2-NEXT:Index: Entry Address Symbolic Address
# GNU2-NEXT:0000: 00010d60 00010d60 .relr.dyn
# GNU2-NEXT:0001: 00000103 00010d64 .relr.dyn + 0x4
# GNU2-NEXT: 00010d80 .relr.dyn + 0x20
# GNU2-NEXT:0002: 00020000 00020000 .relr.dyn + 0xf2a0
# GNU2-NEXT:0003: 000f0501 00020020 .relr.dyn + 0xf2c0
# GNU2-NEXT: 00020028 .relr.dyn + 0xf2c8
# GNU2-NEXT: 00020040 .relr.dyn + 0xf2e0
# GNU2-NEXT: 00020044 .relr.dyn + 0xf2e4
# GNU2-NEXT: 00020048 .relr.dyn + 0xf2e8
# GNU2-NEXT: 0002004c .relr.dyn + 0xf2ec
# GNU2-NEXT:0004: 50400009 00020088 .relr.dyn + 0xf328
# GNU2-NEXT: 000200d4 .relr.dyn + 0xf374
# GNU2-NEXT: 000200ec .relr.dyn + 0xf38c
# GNU2-NEXT: 000200f4 .relr.dyn + 0xf394

--- !ELF
FileHeader:
Expand Down Expand Up @@ -240,12 +245,15 @@ Symbols:
# RUN: llvm-readelf --relocations --raw-relr %t2.has.link 2>&1 | \
# RUN: FileCheck -DFILE=%t2.has.link --check-prefix=RAW-GNU2 %s

# RUN: yaml2obj --docnum=3 %s -o %t3
# RUN: llvm-readelf -r %t3 2>&1 | FileCheck -DFILE=%t3 --check-prefix=GNU3 %s

## .symtab is invalid. Check we report a warning and print entries without symbolization.
# RUN: yaml2obj --docnum=3 -DENTSIZE=1 %s -o %t3.err1
# RUN: llvm-readelf -r %t3.err1 2>&1 | FileCheck -DFILE=%t3.err1 --check-prefixes=GNU3,GNU3-ERR1 %s
# RUN: yaml2obj --docnum=3 -DLINK=0xff %s -o %t3.err2
# RUN: llvm-readelf -r %t3.err2 2>&1 | FileCheck -DFILE=%t3.err2 --check-prefixes=GNU3,GNU3-ERR2 %s

# GNU3: Index: Entry Address Symbolic Address
# GNU3-NEXT: warning: '[[FILE]]': section [index 2] has invalid sh_entsize: expected 24, but got 1
# GNU3-ERR1-NEXT: warning: '[[FILE]]': unable to read symbols from the SHT_SYMTAB section: section [index 2] has invalid sh_entsize: expected 24, but got 1
# GNU3-ERR2-NEXT: warning: '[[FILE]]': unable to get the string table for the SHT_SYMTAB section: invalid section index: 255
# GNU3-NEXT: 0000: 0000000000010d60 0000000000010d60
# GNU3-NEXT: 0001: 0000000000000103 0000000000010d68
# GNU3-NEXT: 0000000000010da0
Expand All @@ -263,8 +271,8 @@ Sections:
Entries: [ 0x0000000000010D60, 0x0000000000000103 ]
- Name: .symtab
Type: SHT_SYMTAB
Link: .strtab
EntSize: 1
Link: [[LINK=.strtab]]
EntSize: [[ENTSIZE=0x18]]
Symbols:
- Name: bar
Value: 0x10D60
62 changes: 38 additions & 24 deletions llvm/tools/llvm-readobj/ELFDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ template <typename ELFT> class ELFDumper : public ObjDumper {
std::string getStaticSymbolName(uint32_t Index) const;
StringRef getDynamicString(uint64_t Value) const;

std::pair<Elf_Sym_Range, std::optional<StringRef>> getSymtabAndStrtab() const;
void printSymbolsHelper(bool IsDynamic, bool ExtraSymInfo) const;
std::string getDynamicEntry(uint64_t Type, uint64_t Value) const;

Expand Down Expand Up @@ -512,6 +513,28 @@ ELFDumper<ELFT>::getVersionTable(const Elf_Shdr &Sec, ArrayRef<Elf_Sym> *SymTab,
return *VersionsOrErr;
}

template <class ELFT>
std::pair<typename ELFDumper<ELFT>::Elf_Sym_Range, std::optional<StringRef>>
ELFDumper<ELFT>::getSymtabAndStrtab() const {
assert(DotSymtabSec);
Elf_Sym_Range Syms(nullptr, nullptr);
std::optional<StringRef> StrTable;
if (Expected<StringRef> StrTableOrErr =
Obj.getStringTableForSymtab(*DotSymtabSec))
StrTable = *StrTableOrErr;
else
reportUniqueWarning(
"unable to get the string table for the SHT_SYMTAB section: " +
toString(StrTableOrErr.takeError()));

if (Expected<Elf_Sym_Range> SymsOrErr = Obj.symbols(DotSymtabSec))
Syms = *SymsOrErr;
else
reportUniqueWarning("unable to read symbols from the SHT_SYMTAB section: " +
toString(SymsOrErr.takeError()));
return {Syms, StrTable};
}

template <class ELFT>
void ELFDumper<ELFT>::printSymbolsHelper(bool IsDynamic,
bool ExtraSymInfo) const {
Expand All @@ -525,20 +548,7 @@ void ELFDumper<ELFT>::printSymbolsHelper(bool IsDynamic,
Syms = dynamic_symbols();
Entries = Syms.size();
} else if (DotSymtabSec) {
if (Expected<StringRef> StrTableOrErr =
Obj.getStringTableForSymtab(*DotSymtabSec))
StrTable = *StrTableOrErr;
else
reportUniqueWarning(
"unable to get the string table for the SHT_SYMTAB section: " +
toString(StrTableOrErr.takeError()));

if (Expected<Elf_Sym_Range> SymsOrErr = Obj.symbols(DotSymtabSec))
Syms = *SymsOrErr;
else
reportUniqueWarning(
"unable to read symbols from the SHT_SYMTAB section: " +
toString(SymsOrErr.takeError()));
std::tie(Syms, StrTable) = getSymtabAndStrtab();
Entries = DotSymtabSec->getEntityCount();
}
if (Syms.empty())
Expand Down Expand Up @@ -3955,19 +3965,21 @@ template <class ELFT> void GNUELFDumper<ELFT>::printRelr(const Elf_Shdr &Sec) {
else
OS << "Index: Entry Address Symbolic Address\n";

// If .symtab is available, collect its defined symbols and sort them by
// st_value.
SmallVector<std::pair<uint64_t, std::string>, 0> Syms;
if (this->DotSymtabSec) {
if (auto SymsOrErr = this->Obj.symbols(this->DotSymtabSec)) {
StringRef Strtab =
unwrapOrError(this->FileName,
this->Obj.getStringTableForSymtab(*this->DotSymtabSec));
for (auto [I, Sym] : enumerate(*SymsOrErr)) {
Elf_Sym_Range Symtab;
std::optional<StringRef> Strtab;
std::tie(Symtab, Strtab) = this->getSymtabAndStrtab();
if (Symtab.size() && Strtab) {
for (auto [I, Sym] : enumerate(Symtab)) {
if (!Sym.st_shndx)
continue;
Syms.emplace_back(Sym.st_value,
this->getFullSymbolName(Sym, I, ArrayRef<Elf_Word>(),
Strtab, false));
*Strtab, false));
}
} else {
this->reportUniqueWarning(SymsOrErr.takeError());
}
}
llvm::stable_sort(Syms);
Expand All @@ -3978,16 +3990,18 @@ template <class ELFT> void GNUELFDumper<ELFT>::printRelr(const Elf_Shdr &Sec) {
OS << format_hex_no_prefix(Where, ELFT::Is64Bits ? 16 : 8);
for (; I < Syms.size() && Syms[I].first <= Where; ++I)
;
// Try symbolizing the address. Find the nearest symbol before or at the
// address and print the symbol and the address difference.
if (I) {
OS << " " << Syms[I - 1].second;
if (Syms[I - 1].first < Where)
OS << " + 0x" << Twine::utohexstr(Where - Syms[I - 1].first);
}
OS << '\n';
};
for (auto [I, R] : enumerate(*RangeOrErr)) {
for (auto [Index, R] : enumerate(*RangeOrErr)) {
typename ELFT::uint Entry = R;
OS << formatv("{0:4}: ", I)
OS << formatv("{0:4}: ", Index)
<< format_hex_no_prefix(Entry, ELFT::Is64Bits ? 16 : 8) << ' ';
if ((Entry & 1) == 0) {
Print(Entry);
Expand Down
4 changes: 2 additions & 2 deletions polly/lib/Analysis/DependenceInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -951,8 +951,8 @@ class DependenceInfoPrinterLegacyPass final : public ScopPass {
bool runOnScop(Scop &S) override {
DependenceInfo &P = getAnalysis<DependenceInfo>();

OS << "Printing analysis '" << P.getPassName() << "' for " << "region: '"
<< S.getRegion().getNameStr() << "' in function '"
OS << "Printing analysis '" << P.getPassName() << "' for "
<< "region: '" << S.getRegion().getNameStr() << "' in function '"
<< S.getFunction().getName() << "':\n";
P.printScop(OS, S);

Expand Down
7 changes: 4 additions & 3 deletions polly/lib/Analysis/ScopBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2715,9 +2715,10 @@ void ScopBuilder::addUserContext() {
if (NameContext != NameUserContext) {
std::string SpaceStr = stringFromIslObj(Space, "null");
errs() << "Error: the name of dimension " << i
<< " provided in -polly-context " << "is '" << NameUserContext
<< "', but the name in the computed " << "context is '"
<< NameContext << "'. Due to this name mismatch, "
<< " provided in -polly-context "
<< "is '" << NameUserContext << "', but the name in the computed "
<< "context is '" << NameContext
<< "'. Due to this name mismatch, "
<< "the -polly-context option is ignored. Please provide "
<< "the context in the parameter space: " << SpaceStr << ".\n";
return;
Expand Down