diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index fcd525d7dd5e69..c25ddaef3213ab 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -42,6 +42,11 @@ static cl::opt EnableRedundantCopyElimination( cl::desc("Enable the redundant copy elimination pass"), cl::init(true), cl::Hidden); +// FIXME: Unify control over GlobalMerge. +static cl::opt + EnableGlobalMerge("riscv-enable-global-merge", cl::Hidden, + cl::desc("Enable the global merge pass")); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { RegisterTargetMachine X(getTheRISCV32Target()); RegisterTargetMachine Y(getTheRISCV64Target()); @@ -205,6 +210,13 @@ bool RISCVPassConfig::addPreISel() { // more details. addPass(createBarrierNoopPass()); } + + if (EnableGlobalMerge == cl::BOU_TRUE) { + addPass(createGlobalMergePass(TM, /* MaxOffset */ 2047, + /* OnlyOptimizeForSize */ false, + /* MergeExternalByDefault */ true)); + } + return false; } diff --git a/llvm/test/CodeGen/RISCV/global-merge-offset.ll b/llvm/test/CodeGen/RISCV/global-merge-offset.ll new file mode 100644 index 00000000000000..13afcba181719e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/global-merge-offset.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/ArrSize/100/g' %s | llc -mtriple=riscv32 -riscv-enable-global-merge \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/ArrSize/100/g' %s | llc -mtriple=riscv64 -riscv-enable-global-merge \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/ArrSize/101/g' %s | llc -mtriple=riscv32 -riscv-enable-global-merge \ +; RUN: -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-TOOBIG +; RUN: sed 's/ArrSize/101/g' %s | llc -mtriple=riscv64 -riscv-enable-global-merge \ +; RUN: -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-TOOBIG + +; This test demonstrates that the MaxOffset is set correctly for RISC-V by +; constructing an input that is at the limit and comparing. + +@ga1 = dso_local global [410 x i32] zeroinitializer, align 4 +@ga2 = dso_local global [ArrSize x i32] zeroinitializer, align 4 +@gi = dso_local global i32 0, align 4 + +; TODO: It would be better for codesize if the final store below was +; `sw a0, 0(a2)`. + +define void @f1(i32 %a) nounwind { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.L_MergedGlobals) +; CHECK-NEXT: addi a2, a1, %lo(.L_MergedGlobals) +; CHECK-NEXT: sw a0, 2044(a2) +; CHECK-NEXT: sw a0, 404(a2) +; CHECK-NEXT: sw a0, %lo(.L_MergedGlobals)(a1) +; CHECK-NEXT: ret +; +; CHECK-TOOBIG-LABEL: f1: +; CHECK-TOOBIG: # %bb.0: +; CHECK-TOOBIG-NEXT: lui a1, %hi(ga1+1640) +; CHECK-TOOBIG-NEXT: lui a2, %hi(.L_MergedGlobals) +; CHECK-TOOBIG-NEXT: addi a3, a2, %lo(.L_MergedGlobals) +; CHECK-TOOBIG-NEXT: sw a0, %lo(ga1+1640)(a1) +; CHECK-TOOBIG-NEXT: sw a0, 408(a3) +; CHECK-TOOBIG-NEXT: sw a0, %lo(.L_MergedGlobals)(a2) +; CHECK-TOOBIG-NEXT: ret + %ga1_end = getelementptr inbounds [410 x i32], ptr @ga1, i32 0, i64 410 + %ga2_end = getelementptr inbounds [ArrSize x i32], ptr @ga2, i32 0, i64 ArrSize + store i32 %a, ptr %ga1_end, align 4 + store i32 %a, ptr %ga2_end, align 4 + store i32 %a, ptr @gi, align 4 + ret void +} diff --git a/llvm/test/CodeGen/RISCV/global-merge.ll b/llvm/test/CodeGen/RISCV/global-merge.ll new file mode 100644 index 00000000000000..20379ee2e7dacd --- /dev/null +++ b/llvm/test/CodeGen/RISCV/global-merge.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -riscv-enable-global-merge -verify-machineinstrs < %s \ +; RUN: | FileCheck %s +; RUN: llc -mtriple=riscv64 -riscv-enable-global-merge -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +@ig1 = internal global i32 0, align 4 +@ig2 = internal global i32 0, align 4 + +@eg1 = dso_local global i32 0, align 4 +@eg2 = dso_local global i32 0, align 4 + +; TODO: It would be better for code size to alter the first store below by +; first fully materialising .L_MergedGlobals in a1 and then storing to it with +; a 0 offset. + +define void @f1(i32 %a) nounwind { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.L_MergedGlobals) +; CHECK-NEXT: sw a0, %lo(.L_MergedGlobals)(a1) +; CHECK-NEXT: addi a1, a1, %lo(.L_MergedGlobals) +; CHECK-NEXT: sw a0, 4(a1) +; CHECK-NEXT: sw a0, 8(a1) +; CHECK-NEXT: sw a0, 12(a1) +; CHECK-NEXT: ret + store i32 %a, ptr @ig1, align 4 + store i32 %a, ptr @ig2, align 4 + store i32 %a, ptr @eg1, align 4 + store i32 %a, ptr @eg2, align 4 + ret void +}