diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index da8447f91f366..9cf9679045c7b 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -88,10 +88,8 @@ class AArch64AsmPrinter : public AsmPrinter { void emitStartOfAsmFile(Module &M) override; void emitJumpTableInfo() override; - void emitJumpTableEntry(const MachineJumpTableInfo *MJTI, - const MachineBasicBlock *MBB, unsigned JTI); - void LowerJumpTableDestSmall(MCStreamer &OutStreamer, const MachineInstr &MI); + void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI); void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, const MachineInstr &MI); @@ -785,33 +783,25 @@ void AArch64AsmPrinter::emitJumpTableInfo() { emitAlignment(Align(Size)); OutStreamer->emitLabel(GetJTISymbol(JTI)); - for (auto *JTBB : JTBBs) - emitJumpTableEntry(MJTI, JTBB, JTI); - } -} + const MCSymbol *BaseSym = AArch64FI->getJumpTableEntryPCRelSymbol(JTI); + const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, OutContext); -void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI, - const MachineBasicBlock *MBB, - unsigned JTI) { - const MCExpr *Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); - auto AFI = MF->getInfo(); - unsigned Size = AFI->getJumpTableEntrySize(JTI); + for (auto *JTBB : JTBBs) { + const MCExpr *Value = + MCSymbolRefExpr::create(JTBB->getSymbol(), OutContext); - if (Size == 4) { - // .word LBB - LJTI - const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); - const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, JTI, OutContext); - Value = MCBinaryExpr::createSub(Value, Base, OutContext); - } else { - // .byte (LBB - LBB) >> 2 (or .hword) - const MCSymbol *BaseSym = AFI->getJumpTableEntryPCRelSymbol(JTI); - const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, OutContext); - Value = MCBinaryExpr::createSub(Value, Base, OutContext); - Value = MCBinaryExpr::createLShr( - Value, MCConstantExpr::create(2, OutContext), OutContext); - } + // Each entry is: + // .byte/.hword (LBB - Lbase)>>2 + // or plain: + // .word LBB - Lbase + Value = MCBinaryExpr::createSub(Value, Base, OutContext); + if (Size != 4) + Value = MCBinaryExpr::createLShr( + Value, MCConstantExpr::create(2, OutContext), OutContext); - OutStreamer->emitValue(Value, Size); + OutStreamer->emitValue(Value, Size); + } + } } /// Small jump tables contain an unsigned byte or half, representing the offset @@ -822,9 +812,9 @@ void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI, /// /// adr xDest, .LBB0_0 /// ldrb wScratch, [xTable, xEntry] (with "lsl #1" for ldrh). -/// add xDest, xDest, xScratch, lsl #2 -void AArch64AsmPrinter::LowerJumpTableDestSmall(llvm::MCStreamer &OutStreamer, - const llvm::MachineInstr &MI) { +/// add xDest, xDest, xScratch (with "lsl #2" for smaller entries) +void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer, + const llvm::MachineInstr &MI) { Register DestReg = MI.getOperand(0).getReg(); Register ScratchReg = MI.getOperand(1).getReg(); Register ScratchRegW = @@ -832,33 +822,50 @@ void AArch64AsmPrinter::LowerJumpTableDestSmall(llvm::MCStreamer &OutStreamer, Register TableReg = MI.getOperand(2).getReg(); Register EntryReg = MI.getOperand(3).getReg(); int JTIdx = MI.getOperand(4).getIndex(); - bool IsByteEntry = MI.getOpcode() == AArch64::JumpTableDest8; + int Size = AArch64FI->getJumpTableEntrySize(JTIdx); // This has to be first because the compression pass based its reachability // calculations on the start of the JumpTableDest instruction. auto Label = MF->getInfo()->getJumpTableEntryPCRelSymbol(JTIdx); + + // If we don't already have a symbol to use as the base, use the ADR + // instruction itself. + if (!Label) { + Label = MF->getContext().createTempSymbol(); + AArch64FI->setJumpTableEntryInfo(JTIdx, Size, Label); + OutStreamer.emitLabel(Label); + } + + auto LabelExpr = MCSymbolRefExpr::create(Label, MF->getContext()); EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADR) .addReg(DestReg) - .addExpr(MCSymbolRefExpr::create( - Label, MF->getContext()))); + .addExpr(LabelExpr)); // Load the number of instruction-steps to offset from the label. - unsigned LdrOpcode = IsByteEntry ? AArch64::LDRBBroX : AArch64::LDRHHroX; + unsigned LdrOpcode; + switch (Size) { + case 1: LdrOpcode = AArch64::LDRBBroX; break; + case 2: LdrOpcode = AArch64::LDRHHroX; break; + case 4: LdrOpcode = AArch64::LDRSWroX; break; + default: + llvm_unreachable("Unknown jump table size"); + } + EmitToStreamer(OutStreamer, MCInstBuilder(LdrOpcode) - .addReg(ScratchRegW) + .addReg(Size == 4 ? ScratchReg : ScratchRegW) .addReg(TableReg) .addReg(EntryReg) .addImm(0) - .addImm(IsByteEntry ? 0 : 1)); + .addImm(Size == 1 ? 0 : 1)); - // Multiply the steps by 4 and add to the already materialized base label - // address. + // Add to the already materialized base label address, multiplying by 4 if + // compressed. EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADDXrs) .addReg(DestReg) .addReg(DestReg) .addReg(ScratchReg) - .addImm(2)); + .addImm(Size == 4 ? 0 : 2)); } void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, @@ -1256,30 +1263,10 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { return; } - case AArch64::JumpTableDest32: { - // We want: - // ldrsw xScratch, [xTable, xEntry, lsl #2] - // add xDest, xTable, xScratch - unsigned DestReg = MI->getOperand(0).getReg(), - ScratchReg = MI->getOperand(1).getReg(), - TableReg = MI->getOperand(2).getReg(), - EntryReg = MI->getOperand(3).getReg(); - EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::LDRSWroX) - .addReg(ScratchReg) - .addReg(TableReg) - .addReg(EntryReg) - .addImm(0) - .addImm(1)); - EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXrs) - .addReg(DestReg) - .addReg(TableReg) - .addReg(ScratchReg) - .addImm(0)); - return; - } + case AArch64::JumpTableDest32: case AArch64::JumpTableDest16: case AArch64::JumpTableDest8: - LowerJumpTableDestSmall(*OutStreamer, *MI); + LowerJumpTableDest(*OutStreamer, *MI); return; case AArch64::FMOVH0: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 56533d5eadf78..711ee70150a5e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6256,6 +6256,9 @@ SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op, SDValue Entry = Op.getOperand(2); int JTI = cast(JT.getNode())->getIndex(); + auto *AFI = DAG.getMachineFunction().getInfo(); + AFI->setJumpTableEntryInfo(JTI, 4, nullptr); + SDNode *Dest = DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT, Entry, DAG.getTargetJumpTable(JTI, MVT::i32)); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 06e88b7b2045f..ee12563cb0936 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -692,7 +692,8 @@ def : Pat<(AArch64LOADgot tconstpool:$addr), // 32-bit jump table destination is actually only 2 instructions since we can // use the table itself as a PC-relative base. But optimization occurs after // branch relaxation so be pessimistic. -let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch" in { +let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch", + isNotDuplicable = 1 in { def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, Sched<[]>; diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 12e938c0f66ce..7e995b8938c24 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -284,15 +284,14 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } unsigned getJumpTableEntrySize(int Idx) const { - auto It = JumpTableEntryInfo.find(Idx); - if (It != JumpTableEntryInfo.end()) - return It->second.first; - return 4; + return JumpTableEntryInfo[Idx].first; } MCSymbol *getJumpTableEntryPCRelSymbol(int Idx) const { - return JumpTableEntryInfo.find(Idx)->second.second; + return JumpTableEntryInfo[Idx].second; } void setJumpTableEntryInfo(int Idx, unsigned Size, MCSymbol *PCRelSym) { + if ((unsigned)Idx >= JumpTableEntryInfo.size()) + JumpTableEntryInfo.resize(Idx+1); JumpTableEntryInfo[Idx] = std::make_pair(Size, PCRelSym); } @@ -353,7 +352,7 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { MILOHContainer LOHContainerSet; SetOfInstructions LOHRelated; - DenseMap> JumpTableEntryInfo; + SmallVector, 2> JumpTableEntryInfo; }; namespace yaml { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 7307d5b7e1d0c..2a85edea65d57 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2994,6 +2994,8 @@ bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); + + MF->getInfo()->setJumpTableEntryInfo(JTI, 4, nullptr); auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg}, {JTAddr, Index}) .addJumpTableIndex(JTI); diff --git a/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir b/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir new file mode 100644 index 0000000000000..0e4148e0163f5 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir @@ -0,0 +1,188 @@ +# RUN: llc -run-pass=tailduplication -tail-dup-size=4 %s -o - | FileCheck %s + +# JumpTableDest32 uses an `adr` to a temporary label (itself). If duplicated we +# cannot guarantee reachability for any uses after the first. + +# CHECK: JumpTableDest32 +# CHECK-NOT: JumpTableDest32 + + +--- | + ; ModuleID = 'jump-table.ll' + source_filename = "jump-table.ll" + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "arm64-apple-ios" + + define i32 @test_jumptable32(i32 %in, i1 %tst) { + br i1 %tst, label %true, label %false + + true: ; preds = %0 + call void @foo() + br label %switch + + false: ; preds = %0 + call void @bar() + br label %switch + + lbl1: ; preds = %lbl4, %lbl3, %def, %switch + %merge = phi i32 [ 1, %switch ], [ 0, %def ], [ 4, %lbl3 ], [ 8, %lbl4 ] + ret i32 %merge + + switch: ; preds = %false, %true + switch i32 %in, label %def [ + i32 0, label %lbl1 + i32 1, label %lbl2 + i32 2, label %lbl3 + i32 4, label %lbl4 + ] + + def: ; preds = %switch + br label %lbl1 + + lbl2: ; preds = %switch + %1 = call i64 @llvm.aarch64.space(i32 262144, i64 undef) + ret i32 2 + + lbl3: ; preds = %switch + br label %lbl1 + + lbl4: ; preds = %switch + br label %lbl1 + } + + declare void @foo() + + declare void @bar() + + ; Function Attrs: nounwind + declare i64 @llvm.aarch64.space(i32, i64) #0 + + attributes #0 = { nounwind } + +... +--- +name: test_jumptable32 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$w0', virtual-reg: '' } + - { reg: '$w1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 32 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -24, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x19', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -32, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x20', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: + hasRedZone: false +jumpTable: + kind: label-difference32 + entries: + - id: 0 + blocks: [ '%bb.9', '%bb.6', '%bb.7', '%bb.5', '%bb.8' ] +body: | + bb.0 (%ir-block.0): + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $w0, $w1, $x19, $x20, $lr + + early-clobber $sp = frame-setup STPXpre killed $x20, killed $x19, $sp, -4 :: (store 8 into %stack.3), (store 8 into %stack.2) + frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store 8 into %stack.1), (store 8 into %stack.0) + frame-setup CFI_INSTRUCTION def_cfa_offset 32 + frame-setup CFI_INSTRUCTION offset $w30, -8 + frame-setup CFI_INSTRUCTION offset $w29, -16 + frame-setup CFI_INSTRUCTION offset $w19, -24 + frame-setup CFI_INSTRUCTION offset $w20, -32 + renamable $w19 = COPY $w0 + TBZW killed renamable $w1, 0, %bb.2 + + bb.1.true: + successors: %bb.3(0x80000000) + liveins: $w19 + + BL @foo, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + B %bb.3 + + bb.2.false: + successors: %bb.3(0x80000000) + liveins: $w19 + + BL @bar, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + B %bb.3 + + bb.3.switch: + successors: %bb.9(0x1c71c71c), %bb.6(0x1c71c71c), %bb.7(0x1c71c71c), %bb.5(0x0e38e38e), %bb.8(0x1c71c71c) + liveins: $w19 + + renamable $w8 = ORRWrs $wzr, killed renamable $w19, 0, implicit-def $x8 + renamable $x9 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0 + early-clobber renamable $x10, dead early-clobber renamable $x11 = JumpTableDest32 killed renamable $x9, killed renamable $x8, %jump-table.0 + BR killed renamable $x10 + + bb.5.def: + successors: %bb.9(0x80000000) + + renamable $w0 = COPY $wzr + B %bb.9 + + bb.6.lbl2: + successors: %bb.9(0x80000000) + + dead $xzr = SPACE 262144, undef renamable $x8 + $w0 = MOVi32imm 2 + B %bb.9 + + bb.7.lbl3: + successors: %bb.9(0x80000000) + + renamable $w0 = MOVi32imm 4 + B %bb.9 + + bb.8.lbl4: + successors: %bb.9(0x80000000) + + renamable $w0 = MOVi32imm 8 + + bb.9.lbl1: + liveins: $w0 + + $fp, $lr = frame-destroy LDPXi $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0) + early-clobber $sp, $x20, $x19 = frame-destroy LDPXpost $sp, 4 :: (load 8 from %stack.3), (load 8 from %stack.2) + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/jump-table-exynos.ll b/llvm/test/CodeGen/AArch64/jump-table-exynos.ll index 8dca2de6e0afe..b5b400ecfbffc 100644 --- a/llvm/test/CodeGen/AArch64/jump-table-exynos.ll +++ b/llvm/test/CodeGen/AArch64/jump-table-exynos.ll @@ -11,7 +11,17 @@ define i32 @test_jumptable(i32 %in) { i32 4, label %lbl4 ] ; CHECK-LABEL: test_jumptable: -; CHECK-NOT: ldrb +; CHECK: adrp [[JTPAGE:x[0-9]+]], .LJTI0_0 +; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], {{#?}}:lo12:.LJTI0_0 +; CHECK: [[PCREL_LBL:.Ltmp.*]]: +; CHECK-NEXT: adr [[PCBASE:x[0-9]+]], [[PCREL_LBL]] +; CHECK: ldrsw x[[OFFSET:[0-9]+]], [x[[JT]], {{x[0-9]+}}, lsl #2] +; CHECK: add [[DEST:x[0-9]+]], [[PCBASE]], x[[OFFSET]] +; CHECK: br [[DEST]] + + +; CHECK: .LJTI0_0: +; CHECK-NEXT: .word .LBB{{.*}}-[[PCREL_LBL]] def: ret i32 0 diff --git a/llvm/test/CodeGen/AArch64/jump-table.ll b/llvm/test/CodeGen/AArch64/jump-table.ll index 4e70e92beaf8d..cbd187e9a49f4 100644 --- a/llvm/test/CodeGen/AArch64/jump-table.ll +++ b/llvm/test/CodeGen/AArch64/jump-table.ll @@ -155,3 +155,56 @@ lbl4: ; CHECK-IOS-NEXT: .byte (LBB{{.*}}-[[JTBASE]])>>2 ; CHECK-IOS-NEXT: .byte (LBB{{.*}}-[[JTBASE]])>>2 ; CHECK-IOS-NOT: .end_data_region + +; Compressing just the first table has the opportunity to truncate the vector of +; sizes. Make sure it doesn't. +define i32 @test_twotables(i32 %in1, i32 %in2) { +; CHECK-LABEL: test_twotables: +; CHECK: .LJTI2_0 +; CHECK: .LJTI2_1 + + switch i32 %in1, label %def [ + i32 0, label %lbl1 + i32 1, label %lbl2 + i32 2, label %lbl3 + i32 4, label %lbl4 + ] + +def: + ret i32 0 + +lbl1: + ret i32 1 + +lbl2: + ret i32 2 + +lbl3: + ret i32 4 + +lbl4: + switch i32 %in1, label %def [ + i32 0, label %lbl5 + i32 1, label %lbl6 + i32 2, label %lbl7 + i32 4, label %lbl8 + ] + +lbl5: + call i64 @llvm.aarch64.space(i32 262144, i64 undef) + ret i32 1 + +lbl6: + call i64 @llvm.aarch64.space(i32 262144, i64 undef) + ret i32 2 + +lbl7: + call i64 @llvm.aarch64.space(i32 262144, i64 undef) + ret i32 4 +lbl8: + call i64 @llvm.aarch64.space(i32 262144, i64 undef) + ret i32 8 + +} + +declare i64 @llvm.aarch64.space(i32, i64) diff --git a/llvm/test/CodeGen/AArch64/win64-jumptable.ll b/llvm/test/CodeGen/AArch64/win64-jumptable.ll index 1983b2568cdee..1071a736cffd7 100644 --- a/llvm/test/CodeGen/AArch64/win64-jumptable.ll +++ b/llvm/test/CodeGen/AArch64/win64-jumptable.ll @@ -40,10 +40,10 @@ declare void @g(i32, i32) ; CHECK-NEXT: .seh_endfunclet ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .LJTI0_0: -; CHECK: .word .LBB0_2-.LJTI0_0 -; CHECK: .word .LBB0_3-.LJTI0_0 -; CHECK: .word .LBB0_4-.LJTI0_0 -; CHECK: .word .LBB0_5-.LJTI0_0 +; CHECK: .word .LBB0_2-.Ltmp0 +; CHECK: .word .LBB0_3-.Ltmp0 +; CHECK: .word .LBB0_4-.Ltmp0 +; CHECK: .word .LBB0_5-.Ltmp0 ; CHECK: .seh_handlerdata ; CHECK: .text ; CHECK: .seh_endproc