Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AArch64] Return the correct size for TLSDESC_CALLSEQ
The branch relaxation pass is computing the wrong offsets because it assumes TLSDESC_CALLSEQ eats up 4 bytes, when in fact it is lowered to an instruction sequence taking up 16 bytes. This can become a problem in huge files with lots of TLS accesses, as it may slowly move branch targets out of the range computed by the branch relaxation pass. Fixes PR24234 https://llvm.org/bugs/show_bug.cgi?id=24234 Differential Revision: https://reviews.llvm.org/D22870 llvm-svn: 277331
- Loading branch information
Showing
2 changed files
with
87 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
84 changes: 84 additions & 0 deletions
84
llvm/test/CodeGen/MIR/AArch64/inst-size-tlsdesc-callseq.mir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
# RUN: llc -mtriple=aarch64-unknown -run-pass aarch64-branch-relax -aarch64-tbz-offset-bits=4 %s -o - | FileCheck %s | ||
--- | | ||
; ModuleID = 'test.ll' | ||
source_filename = "test.ll" | ||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" | ||
target triple = "aarch64-unknown" | ||
|
||
@ThreadLocalGlobal = external thread_local local_unnamed_addr global i32, align 8 | ||
|
||
define i32 @test_tlsdesc_callseq_length(i32 %in) { | ||
%val = and i32 %in, 1 | ||
%tst = icmp eq i32 %val, 0 | ||
br i1 %tst, label %true, label %false | ||
|
||
true: ; preds = %0 | ||
%1 = load i32, i32* @ThreadLocalGlobal, align 8 | ||
ret i32 %1 | ||
|
||
false: ; preds = %0 | ||
ret i32 0 | ||
} | ||
|
||
... | ||
--- | ||
# CHECK-LABEL: name:{{.*}}test_tlsdesc_callseq_length | ||
# If the size of TLSDESC_CALLSEQ is computed correctly, that will push | ||
# the bb.2.false block too far away from the TBNZW, so the branch will | ||
# have to be relaxed (note that we're using -aarch64-tbz-offset-bits to | ||
# constrain the range that can be reached with the TBNZW to something smaller | ||
# than what TLSDESC_CALLSEQ is lowered to). | ||
# CHECK: TBZW killed %w0, 0, %bb.1.true | ||
# CHECK: B %bb.2.false | ||
name: test_tlsdesc_callseq_length | ||
alignment: 2 | ||
exposesReturnsTwice: false | ||
hasInlineAsm: false | ||
allVRegsAllocated: true | ||
isSSA: false | ||
tracksRegLiveness: false | ||
tracksSubRegLiveness: false | ||
liveins: | ||
- { reg: '%w0' } | ||
frameInfo: | ||
isFrameAddressTaken: false | ||
isReturnAddressTaken: false | ||
hasStackMap: false | ||
hasPatchPoint: false | ||
stackSize: 16 | ||
offsetAdjustment: 0 | ||
maxAlignment: 16 | ||
adjustsStack: false | ||
hasCalls: true | ||
maxCallFrameSize: 0 | ||
hasOpaqueSPAdjustment: false | ||
hasVAStart: false | ||
hasMustTailInVarArgFunc: false | ||
stack: | ||
- { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '%lr' } | ||
body: | | ||
bb.0 (%ir-block.0): | ||
successors: %bb.1.true, %bb.2.false | ||
liveins: %w0, %lr | ||
TBNZW killed %w0, 0, %bb.2.false | ||
bb.1.true: | ||
liveins: %lr | ||
early-clobber %sp = frame-setup STRXpre killed %lr, %sp, -16 :: (store 8 into %stack.0) | ||
frame-setup CFI_INSTRUCTION def_cfa_offset 16 | ||
frame-setup CFI_INSTRUCTION offset %w30, -16 | ||
TLSDESC_CALLSEQ target-flags(aarch64-tls) @ThreadLocalGlobal, implicit-def dead %lr, implicit-def %x0, implicit-def dead %x1 | ||
%x8 = MRS 56962 | ||
%w0 = LDRWroX killed %x8, killed %x0, 0, 0 :: (load 4 from @ThreadLocalGlobal, align 8) | ||
early-clobber %sp, %lr = LDRXpost %sp, 16 :: (load 8 from %stack.0) | ||
RET killed %lr, implicit killed %w0 | ||
bb.2.false: | ||
liveins: %lr | ||
%w0 = ORRWrs %wzr, %wzr, 0 | ||
RET killed %lr, implicit killed %w0 | ||
... |