[LoongArch] Insert nops and emit align reloc when handle alignment di…

…rective Refer to RISCV, we will fix up the alignment if linker relaxation changes code size and breaks alignment. Insert enough Nops and emit R_LARCH_ALIGN relocation type so that linker could satisfy the alignment by removing Nops. It does so only in sections with the SHF_EXECINSTR flag. In LoongArch psABI v2.30, R_LARCH_ALIGN requires symbol index. The lowest 8 bits of addend represent alignment and the other bits of addend represent the maximum number of bytes to emit.
llvm · Jan 11, 2024 · 4985669 · 4985669
1 parent 7b45c54
commit 4985669
Show file tree

Hide file tree

Showing 6 changed files with 174 additions and 2 deletions.
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -16,8 +16,11 @@
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/EndianStream.h"
+#include "llvm/Support/MathExtras.h"
 
 #define DEBUG_TYPE "loongarch-asmbackend"
 
@@ -174,6 +177,72 @@ void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm,
   }
 }
 
+// Linker relaxation may change code size. We have to insert Nops
+// for .align directive when linker relaxation enabled. So then Linker
+// could satisfy alignment by removing Nops.
+// The function returns the total Nops Size we need to insert.
+bool LoongArchAsmBackend::shouldInsertExtraNopBytesForCodeAlign(
+    const MCAlignFragment &AF, unsigned &Size) {
+  // Calculate Nops Size only when linker relaxation enabled.
+  const MCSubtargetInfo *STI = AF.getSubtargetInfo();
+  if (!STI->hasFeature(LoongArch::FeatureRelax))
+    return false;
+
+  // Ignore alignment if the minimum Nop size is less than the MaxBytesToEmit.
+  const unsigned MinNopLen = 4;
+  if (AF.getMaxBytesToEmit() < MinNopLen)
+    return false;
+  Size = AF.getAlignment().value() - MinNopLen;
+  return AF.getAlignment() > MinNopLen;
+}
+
+// We need to insert R_LARCH_ALIGN relocation type to indicate the
+// position of Nops and the total bytes of the Nops have been inserted
+// when linker relaxation enabled.
+// The function inserts fixup_loongarch_align fixup which eventually will
+// transfer to R_LARCH_ALIGN relocation type.
+// The improved R_LARCH_ALIGN requires symbol index. The lowest 8 bits of
+// addend represent alignment and the other bits of addend represent the
+// maximum number of bytes to emit. The maximum number of bytes is zero
+// means ignore the emit limit.
+bool LoongArchAsmBackend::shouldInsertFixupForCodeAlign(
+    MCAssembler &Asm, const MCAsmLayout &Layout, MCAlignFragment &AF) {
+  // Insert the fixup only when linker relaxation enabled.
+  const MCSubtargetInfo *STI = AF.getSubtargetInfo();
+  if (!STI->hasFeature(LoongArch::FeatureRelax))
+    return false;
+
+  // Calculate total Nops we need to insert. If there are none to insert
+  // then simply return.
+  unsigned Count;
+  if (!shouldInsertExtraNopBytesForCodeAlign(AF, Count))
+    return false;
+
+  MCSection *Sec = AF.getParent();
+  MCContext &Ctx = Asm.getContext();
+  const MCExpr *Dummy = MCConstantExpr::create(0, Ctx);
+  // Create fixup_loongarch_align fixup.
+  MCFixup Fixup =
+      MCFixup::create(0, Dummy, MCFixupKind(LoongArch::fixup_loongarch_align));
+  const MCSymbolRefExpr *MCSym = getSecToAlignSym()[Sec];
+  if (MCSym == nullptr) {
+    // Create a symbol and make the value of symbol is zero.
+    MCSymbol *Sym = Ctx.createTempSymbol(".Lla-relax-align", false);
+    Sym->setFragment(&*Sec->getBeginSymbol()->getFragment());
+    Asm.registerSymbol(*Sym);
+    MCSym = MCSymbolRefExpr::create(Sym, Ctx);
+    getSecToAlignSym()[Sec] = MCSym;
+  }
+
+  uint64_t FixedValue = 0;
+  unsigned Lo = Log2_64(Count) + 1;
+  unsigned Hi = AF.getMaxBytesToEmit() >= Count ? 0 : AF.getMaxBytesToEmit();
+  MCValue Value = MCValue::get(MCSym, nullptr, Hi << 8 | Lo);
+  Asm.getWriter().recordRelocation(Asm, Layout, &AF, Fixup, Value, FixedValue);
+
+  return true;
+}
+
 bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
                                                 const MCFixup &Fixup,
                                                 const MCValue &Target,

diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
@@ -17,7 +17,9 @@
 #include "MCTargetDesc/LoongArchFixupKinds.h"
 #include "MCTargetDesc/LoongArchMCTargetDesc.h"
 #include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 
 namespace llvm {
@@ -27,6 +29,7 @@ class LoongArchAsmBackend : public MCAsmBackend {
   uint8_t OSABI;
   bool Is64Bit;
   const MCTargetOptions &TargetOptions;
+  DenseMap<MCSection *, const MCSymbolRefExpr *> SecToAlignSym;
 
 public:
   LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit,
@@ -45,6 +48,15 @@ class LoongArchAsmBackend : public MCAsmBackend {
                   uint64_t Value, bool IsResolved,
                   const MCSubtargetInfo *STI) const override;
 
+  // Return Size with extra Nop Bytes for alignment directive in code section.
+  bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF,
+                                             unsigned &Size) override;
+
+  // Insert target specific fixup type for alignment directive in code section.
+  bool shouldInsertFixupForCodeAlign(MCAssembler &Asm,
+                                     const MCAsmLayout &Layout,
+                                     MCAlignFragment &AF) override;
+
   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
                              const MCValue &Target,
                              const MCSubtargetInfo *STI) override;
@@ -75,6 +87,9 @@ class LoongArchAsmBackend : public MCAsmBackend {
   std::unique_ptr<MCObjectTargetWriter>
   createObjectTargetWriter() const override;
   const MCTargetOptions &getTargetOptions() const { return TargetOptions; }
+  DenseMap<MCSection *, const MCSymbolRefExpr *> &getSecToAlignSym() {
+    return SecToAlignSym;
+  }
 };
 } // end namespace llvm
 

diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h
@@ -109,6 +109,8 @@ enum Fixups {
   fixup_loongarch_tls_gd_hi20,
   // Generate an R_LARCH_RELAX which indicates the linker may relax here.
   fixup_loongarch_relax = FirstLiteralRelocationKind + ELF::R_LARCH_RELAX,
+  // Generate an R_LARCH_ALIGN which indicates the linker may fixup align here.
+  fixup_loongarch_align = FirstLiteralRelocationKind + ELF::R_LARCH_ALIGN,
   // 36-bit fixup corresponding to %call36(foo) for a pair instructions:
   // pcaddu18i+jirl.
   fixup_loongarch_call36 = FirstLiteralRelocationKind + ELF::R_LARCH_CALL36,

diff --git a/llvm/test/MC/LoongArch/Relocations/align-non-executable.s b/llvm/test/MC/LoongArch/Relocations/align-non-executable.s
@@ -0,0 +1,22 @@
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s \
+# RUN:     | llvm-readobj -r - | FileCheck --check-prefixes=CHECK,RELAX %s
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s \
+# RUN:     | llvm-readobj -r - | FileCheck %s
+
+.section ".dummy", "a"
+.L1:
+  la.pcrel $t0, sym
+.p2align 3
+.L2:
+.dword .L2 - .L1
+
+# CHECK:       Relocations [
+# CHECK-NEXT:    Section ({{.*}}) .rela.dummy {
+# CHECK-NEXT:      0x0 R_LARCH_PCALA_HI20 sym 0x0
+# RELAX-NEXT:      0x0 R_LARCH_RELAX - 0x0
+# CHECK-NEXT:      0x4 R_LARCH_PCALA_LO12 sym 0x0
+# RELAX-NEXT:      0x4 R_LARCH_RELAX - 0x0
+# RELAX-NEXT:      0x8 R_LARCH_ADD64 .L2 0x0
+# RELAX-NEXT:      0x8 R_LARCH_SUB64 .L1 0x0
+# CHECK-NEXT:    }
+# CHECK-NEXT:  ]
diff --git a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s
@@ -28,12 +28,23 @@
 
 # RELAX:       Relocations [
 # RELAX-NEXT:    Section ({{.*}}) .rela.text {
+# RELAX-NEXT:      0x4 R_LARCH_ALIGN {{.*}} 0x4
 # RELAX-NEXT:      0x10 R_LARCH_PCALA_HI20 .L1 0x0
 # RELAX-NEXT:      0x10 R_LARCH_RELAX - 0x0
 # RELAX-NEXT:      0x14 R_LARCH_PCALA_LO12 .L1 0x0
 # RELAX-NEXT:      0x14 R_LARCH_RELAX - 0x0
 # RELAX-NEXT:    }
 # RELAX-NEXT:    Section ({{.*}}) .rela.data {
+# RELAX-NEXT:      0x10 R_LARCH_ADD8 .L3 0x0
+# RELAX-NEXT:      0x10 R_LARCH_SUB8 .L2 0x0
+# RELAX-NEXT:      0x11 R_LARCH_ADD16 .L3 0x0
+# RELAX-NEXT:      0x11 R_LARCH_SUB16 .L2 0x0
+# RELAX-NEXT:      0x13 R_LARCH_ADD32 .L3 0x0
+# RELAX-NEXT:      0x13 R_LARCH_SUB32 .L2 0x0
+# RELAX-NEXT:      0x17 R_LARCH_ADD64 .L3 0x0
+# RELAX-NEXT:      0x17 R_LARCH_SUB64 .L2 0x0
+# RELAX-NEXT:      0x1F R_LARCH_ADD_ULEB128 .L3 0x0
+# RELAX-NEXT:      0x1F R_LARCH_SUB_ULEB128 .L2 0x0
 # RELAX-NEXT:      0x20 R_LARCH_ADD8 .L4 0x0
 # RELAX-NEXT:      0x20 R_LARCH_SUB8 .L3 0x0
 # RELAX-NEXT:      0x21 R_LARCH_ADD16 .L4 0x0
@@ -57,7 +68,7 @@
 
 # RELAX:      Hex dump of section '.data':
 # RELAX-NEXT: 0x00000000 04040004 00000004 00000000 00000004
-# RELAX-NEXT: 0x00000010 0c0c000c 0000000c 00000000 0000000c
+# RELAX-NEXT: 0x00000010 00000000 00000000 00000000 00000000
 # RELAX-NEXT: 0x00000020 00000000 00000000 00000000 00000000
 # RELAX-NEXT: 0x00000030 00000000 00000000 00000000 000000
 
@@ -78,7 +89,7 @@
 .word  .L2 - .L1
 .dword .L2 - .L1
 .uleb128 .L2 - .L1
-## TODO Handle alignment directive.
+## With relaxation, emit relocs because the .align makes the diff variable.
 .byte  .L3 - .L2
 .short .L3 - .L2
 .word  .L3 - .L2

diff --git a/llvm/test/MC/LoongArch/Relocations/relax-align.s b/llvm/test/MC/LoongArch/Relocations/relax-align.s
@@ -0,0 +1,53 @@
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s \
+# RUN:     | llvm-readelf -rs - | FileCheck %s --check-prefix=NORELAX
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s \
+# RUN:     | llvm-readelf -rs - | FileCheck %s --check-prefix=RELAX
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s \
+# RUN:     | llvm-objdump -d - | FileCheck -check-prefix=RELAX-INST %s
+
+# NORELAX: There are no relocations in this file.
+# NORELAX: Symbol table '.symtab' contains 1 entries:
+
+# RELAX:       0000000000000000  0000000100000066 R_LARCH_ALIGN          0000000000000000 {{.*}} + 4
+# RELAX-NEXT:  0000000000000010  0000000100000066 R_LARCH_ALIGN          0000000000000000 {{.*}} + 5
+# RELAX-NEXT:  000000000000002c  0000000100000066 R_LARCH_ALIGN          0000000000000000 {{.*}} + 4
+# RELAX-NEXT:  000000000000003c  0000000100000066 R_LARCH_ALIGN          0000000000000000 {{.*}} + b04
+# RELAX-NEXT:  0000000000000048  0000000100000066 R_LARCH_ALIGN          0000000000000000 {{.*}} + 4
+# RELAX-EMPTY:
+# RELAX:       0000000000000000  0000000200000066 R_LARCH_ALIGN          0000000000000000 <null> + 4
+# RELAX-EMPTY:
+# RELAX:       Symbol table '.symtab' contains 3 entries:
+# RELAX:       0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT   UND
+# RELAX-NEXT:  1: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT     2
+# RELAX-NEXT:  2: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT     4
+
+.text
+.p2align 4        # A = 0x0
+nop
+.p2align 5        # B = A + 3 * NOP + NOP = 0x10
+.p2align 4        # C = B + 7 * NOP = 0x2C
+nop
+.p2align 4, , 11  # D = C + 3 * NOP + NOP = 0x3C
+## Not emit the third parameter.
+.p2align 4, , 12  # E = D + 3 * NOP = 0x48
+                  # END = E + 3 * NOP = 0x54 = 21 * NOP
+
+## Not emit R_LARCH_ALIGN if code alignment great than alignment directive.
+.p2align 2
+.p2align 1
+.p2align 0
+## Not emit instructions if max emit bytes less than min nop size.
+.p2align 4, , 2
+## Not emit R_LARCH_ALIGN if alignment directive with specific padding value.
+.p2align 4, 1
+nop
+.p2align 4, 1, 12
+
+# RELAX-INST:           <.text>:
+# RELAX-INST-COUNT-21:    nop
+# RELAX-INST-COUNT-3:     01 01 01 01
+# RELAX-INST-NEXT:        nop
+# RELAX-INST-COUNT-3:     01 01 01 01
+
+.section .text2, "ax"
+.p2align 4