From ae4d39c9e4ad391b817a798aa4b5fecfbe9c6cf4 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Thu, 20 Feb 2020 17:42:51 -0800 Subject: [PATCH] [SystemZ] Copy Access registers and CC with the correct register class. On SystemZ there are a set of "access registers" that can be copied in and out of 32-bit GPRs with special instructions. These instructions can only perform the copy using low 32-bit parts of the 64-bit GPRs. However, the default register class for 32-bit integers is GRX32, which also contains the high 32-bit part registers. In order to never end up with a case of such a COPY into a high reg, this patch adds a new simple pre-RA pass that selects such COPYs into target instructions. This pass also handles COPYs from CC (Condition Code register), and COPYs to CC can now also be emitted from a high reg in copyPhysReg(). Fixes: https://bugs.llvm.org/show_bug.cgi?id=44254 Review: Ulrich Weigand. Differential Revision: https://reviews.llvm.org/D75014 --- llvm/lib/Target/SystemZ/CMakeLists.txt | 1 + llvm/lib/Target/SystemZ/SystemZ.h | 1 + .../Target/SystemZ/SystemZCopyPhysRegs.cpp | 120 ++++++++++++++++++ llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 21 +-- .../Target/SystemZ/SystemZTargetMachine.cpp | 5 + llvm/test/CodeGen/SystemZ/tls-08.ll | 24 ++++ llvm/test/CodeGen/SystemZ/tls-09.ll | 37 ++++++ llvm/test/CodeGen/SystemZ/tls-10.mir | 24 ++++ llvm/test/CodeGen/SystemZ/tls-11.mir | 18 +++ 9 files changed, 234 insertions(+), 17 deletions(-) create mode 100644 llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp create mode 100644 llvm/test/CodeGen/SystemZ/tls-08.ll create mode 100644 llvm/test/CodeGen/SystemZ/tls-09.ll create mode 100644 llvm/test/CodeGen/SystemZ/tls-10.mir create mode 100644 llvm/test/CodeGen/SystemZ/tls-11.mir diff --git a/llvm/lib/Target/SystemZ/CMakeLists.txt b/llvm/lib/Target/SystemZ/CMakeLists.txt index 03e5e31831f65..6922b1ca2c36a 100644 --- a/llvm/lib/Target/SystemZ/CMakeLists.txt +++ b/llvm/lib/Target/SystemZ/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_target(SystemZCodeGen SystemZAsmPrinter.cpp SystemZCallingConv.cpp SystemZConstantPoolValue.cpp + SystemZCopyPhysRegs.cpp SystemZElimCompare.cpp SystemZFrameLowering.cpp SystemZHazardRecognizer.cpp diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h index 0808160f627cb..bedbd061ea5c1 100644 --- a/llvm/lib/Target/SystemZ/SystemZ.h +++ b/llvm/lib/Target/SystemZ/SystemZ.h @@ -193,6 +193,7 @@ FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM); FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM); FunctionPass *createSystemZTDCPass(); } // end namespace llvm diff --git a/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp new file mode 100644 index 0000000000000..7d21d29d270e3 --- /dev/null +++ b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp @@ -0,0 +1,120 @@ +//===---------- SystemZPhysRegCopy.cpp - Handle phys reg copies -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass makes sure that a COPY of a physical register will be +// implementable after register allocation in copyPhysReg() (this could be +// done in EmitInstrWithCustomInserter() instead if COPY instructions would +// be passed to it). +// +//===----------------------------------------------------------------------===// + +#include "SystemZMachineFunctionInfo.h" +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +#define SYSTEMZ_COPYPHYSREGS_NAME "SystemZ Copy Physregs" + +namespace llvm { + void initializeSystemZCopyPhysRegsPass(PassRegistry&); +} + +namespace { + +class SystemZCopyPhysRegs : public MachineFunctionPass { +public: + static char ID; + SystemZCopyPhysRegs() + : MachineFunctionPass(ID), TII(nullptr), MRI(nullptr) { + initializeSystemZCopyPhysRegsPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return SYSTEMZ_COPYPHYSREGS_NAME; } + + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + + bool visitMBB(MachineBasicBlock &MBB); + + const SystemZInstrInfo *TII; + MachineRegisterInfo *MRI; +}; + +char SystemZCopyPhysRegs::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(SystemZCopyPhysRegs, "systemz-copy-physregs", + SYSTEMZ_COPYPHYSREGS_NAME, false, false) + +FunctionPass *llvm::createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM) { + return new SystemZCopyPhysRegs(); +} + +void SystemZCopyPhysRegs::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool SystemZCopyPhysRegs::visitMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + // Certain special registers can only be copied from a subset of the + // default register class of the type. It is therefore necessary to create + // the target copy instructions before regalloc instead of in copyPhysReg(). + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E; ) { + MachineInstr *MI = &*MBBI++; + if (!MI->isCopy()) + continue; + + DebugLoc DL = MI->getDebugLoc(); + Register SrcReg = MI->getOperand(1).getReg(); + Register DstReg = MI->getOperand(0).getReg(); + if (DstReg.isVirtual() && + (SrcReg == SystemZ::CC || SystemZ::AR32BitRegClass.contains(SrcReg))) { + Register Tmp = MRI->createVirtualRegister(&SystemZ::GR32BitRegClass); + if (SrcReg == SystemZ::CC) + BuildMI(MBB, MI, DL, TII->get(SystemZ::IPM), Tmp); + else + BuildMI(MBB, MI, DL, TII->get(SystemZ::EAR), Tmp).addReg(SrcReg); + MI->getOperand(1).setReg(Tmp); + Modified = true; + } + else if (SrcReg.isVirtual() && + SystemZ::AR32BitRegClass.contains(DstReg)) { + Register Tmp = MRI->createVirtualRegister(&SystemZ::GR32BitRegClass); + MI->getOperand(0).setReg(Tmp); + BuildMI(MBB, MBBI, DL, TII->get(SystemZ::SAR), DstReg).addReg(Tmp); + Modified = true; + } + } + + return Modified; +} + +bool SystemZCopyPhysRegs::runOnMachineFunction(MachineFunction &F) { + TII = static_cast(F.getSubtarget().getInstrInfo()); + MRI = &F.getRegInfo(); + + bool Modified = false; + for (auto &MBB : F) + Modified |= visitMBB(MBB); + + return Modified; +} + diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 8b30196474df6..7161afe6f5e50 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -820,18 +820,11 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - // Move CC value from/to a GR32. - if (SrcReg == SystemZ::CC) { - auto MIB = BuildMI(MBB, MBBI, DL, get(SystemZ::IPM), DestReg); - if (KillSrc) { - const MachineFunction *MF = MBB.getParent(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - MIB->addRegisterKilled(SrcReg, TRI); - } - return; - } + // Move CC value from a GR32. if (DestReg == SystemZ::CC) { - BuildMI(MBB, MBBI, DL, get(SystemZ::TMLH)) + unsigned Opcode = + SystemZ::GR32BitRegClass.contains(SrcReg) ? SystemZ::TMLH : SystemZ::TMHH; + BuildMI(MBB, MBBI, DL, get(Opcode)) .addReg(SrcReg, getKillRegState(KillSrc)) .addImm(3 << (SystemZ::IPM_CC - 16)); return; @@ -856,12 +849,6 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opcode = SystemZ::VLR; else if (SystemZ::AR32BitRegClass.contains(DestReg, SrcReg)) Opcode = SystemZ::CPYA; - else if (SystemZ::AR32BitRegClass.contains(DestReg) && - SystemZ::GR32BitRegClass.contains(SrcReg)) - Opcode = SystemZ::SAR; - else if (SystemZ::GR32BitRegClass.contains(DestReg) && - SystemZ::AR32BitRegClass.contains(SrcReg)) - Opcode = SystemZ::EAR; else llvm_unreachable("Impossible reg-to-reg copy"); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index 9fc5616766568..3f467b200852d 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -222,6 +222,7 @@ class SystemZPassConfig : public TargetPassConfig { void addIRPasses() override; bool addInstSelector() override; bool addILPOpts() override; + void addPreRegAlloc() override; void addPostRewrite() override; void addPostRegAlloc() override; void addPreSched2() override; @@ -253,6 +254,10 @@ bool SystemZPassConfig::addILPOpts() { return true; } +void SystemZPassConfig::addPreRegAlloc() { + addPass(createSystemZCopyPhysRegsPass(getSystemZTargetMachine())); +} + void SystemZPassConfig::addPostRewrite() { addPass(createSystemZPostRewritePass(getSystemZTargetMachine())); } diff --git a/llvm/test/CodeGen/SystemZ/tls-08.ll b/llvm/test/CodeGen/SystemZ/tls-08.ll new file mode 100644 index 0000000000000..57dd552740f20 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/tls-08.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mcpu=z196 -mtriple=s390x-linux-gnu -O0 \ +; RUN: -stop-before=regallocfast 2>&1 | FileCheck %s +; RUN: llc < %s -mcpu=z196 -mtriple=s390x-linux-gnu -O3 \ +; RUN: -stop-before=livevars 2>&1 | FileCheck %s +; +; Test that copies to/from access registers are handled before regalloc with +; GR32 regs. + +@x = dso_local thread_local global i32 0, align 4 +define weak_odr hidden i32* @fun0() { +; CHECK: name: fun0 +; CHECK: {{%[0-9]+}}:gr32bit = EAR $a0 +; CHECK: {{%[0-9]+}}:gr32bit = EAR $a1 + ret i32* @x +} + +define i32 @fun1() { +; CHECK: name: fun1 +; CHECK: [[VREG0:%[0-9]+]]:gr32bit = COPY %0 +; CHECK-NEXT: $a1 = SAR [[VREG0]] +; CHECK: {{%[0-9]+}}:gr32bit = EAR $a0 + %val = call i32 asm "blah", "={a0}, {a1}" (i32 0) + ret i32 %val +} diff --git a/llvm/test/CodeGen/SystemZ/tls-09.ll b/llvm/test/CodeGen/SystemZ/tls-09.ll new file mode 100644 index 0000000000000..4512206c275c9 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/tls-09.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -mcpu=z196 -mtriple=s390x-linux-gnu -O0 +; +; Test that a0 and a1 are copied successfully into GR32 registers. + +@x = dso_local thread_local global i32 0, align 4 +define i32 @fun0(i32 signext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext) { + %8 = alloca i32, align 4 + %9 = alloca i32, align 4 + %10 = alloca i32, align 4 + %11 = alloca i32, align 4 + %12 = alloca i32, align 4 + %13 = alloca i32, align 4 + %14 = alloca i32, align 4 + %15 = load i32, i32* @x, align 4 + store i32 %0, i32* %8, align 4 + store i32 %1, i32* %9, align 4 + store i32 %2, i32* %10, align 4 + store i32 %3, i32* %11, align 4 + store i32 %4, i32* %12, align 4 + store i32 %5, i32* %13, align 4 + store i32 %6, i32* %14, align 4 + %16 = load i32, i32* %8, align 4 + %17 = add nsw i32 %15, %16 + %18 = load i32, i32* %9, align 4 + %19 = add nsw i32 %17, %18 + %20 = load i32, i32* %10, align 4 + %21 = add nsw i32 %19, %20 + %22 = load i32, i32* %11, align 4 + %23 = add nsw i32 %21, %22 + %24 = load i32, i32* %12, align 4 + %25 = add nsw i32 %23, %24 + %26 = load i32, i32* %13, align 4 + %27 = add nsw i32 %25, %26 + %28 = load i32, i32* %14, align 4 + %29 = add nsw i32 %27, %28 + ret i32 %29 +} diff --git a/llvm/test/CodeGen/SystemZ/tls-10.mir b/llvm/test/CodeGen/SystemZ/tls-10.mir new file mode 100644 index 0000000000000..33094aff058de --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/tls-10.mir @@ -0,0 +1,24 @@ +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z196 -O0 -start-after=finalize-isel \ +# RUN: -stop-before=regallocfast -o - %s | FileCheck %s +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z196 -O3 -start-after=finalize-isel \ +# RUN: -stop-before=livevars -o - %s | FileCheck %s +# +# Test that a COPY from CC gets implemented with an IPM to a GR32 reg. + +--- +name: fun0 +tracksRegLiveness: true +registers: + - { id: 0, class: grx32bit } +body: | + bb.0: + liveins: $cc + ; CHECK-LABEL: name: fun0 + ; CHECK: %1:gr32bit = IPM implicit $cc + ; CHECK-NEXT: %0:grx32bit = COPY %1 + ; CHECK-NEXT: $r2l = COPY %0 + ; CHECK-NEXT: Return implicit $r2l + %0:grx32bit = COPY $cc + $r2l = COPY %0 + Return implicit $r2l +... diff --git a/llvm/test/CodeGen/SystemZ/tls-11.mir b/llvm/test/CodeGen/SystemZ/tls-11.mir new file mode 100644 index 0000000000000..623a8ab519f73 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/tls-11.mir @@ -0,0 +1,18 @@ +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z196 -O0 -start-before=prologepilog \ +# RUN: -o - %s | FileCheck %s +# +# Test that a COPY to CC gets implemented with a tmlh or tmhh depending on +# the source register. + +--- +name: fun0 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r3l, $r4h + ; CHECK-LABEL: fun0 + ; CHECK: tmlh %r3, 12288 + ; CHECK: tmhh %r4, 12288 + $cc = COPY $r3l + $cc = COPY $r4h +...