-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[PowerPC] Implement fence builtin #76495
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-clang Author: Qiu Chaofan (ecnelises) ChangesThis builtin will work as barrier for instruction motion (scheduling, etc.) Full diff: https://github.com/llvm/llvm-project/pull/76495.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index a35488ed3dfa56..829c60defe17c6 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -944,6 +944,9 @@ TARGET_BUILTIN(__builtin_pack_vector_int128, "V1LLLiULLiULLi", "", "vsx")
// Set the floating point rounding mode
BUILTIN(__builtin_setrnd, "di", "")
+// Barrier for instruction motion
+BUILTIN(__builtin_ppc_fence, "v", "")
+
// Get content from current FPSCR
BUILTIN(__builtin_readflm, "d", "")
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 045c273f03c7a0..41935abfb65d3b 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -212,6 +212,7 @@ static void defineXLCompatMacros(MacroBuilder &Builder) {
Builder.defineMacro("__darn_32", "__builtin_darn_32");
Builder.defineMacro("__darn_raw", "__builtin_darn_raw");
Builder.defineMacro("__dcbf", "__builtin_dcbf");
+ Builder.defineMacro("__fence", "__builtin_ppc_fence");
Builder.defineMacro("__fmadd", "__builtin_fma");
Builder.defineMacro("__fmadds", "__builtin_fmaf");
Builder.defineMacro("__abs", "__builtin_abs");
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c
index 9187bb855dac22..a5cc97161c56ac 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c
@@ -194,6 +194,18 @@ void test_dcbz() {
__dcbz(c);
}
+// CHECK-LABEL: @test_fence(
+// CHECK: call void @llvm.ppc.fence()
+// CHECK-NEXT: ret void
+//
+// CHECK-32-LABEL: @test_fence(
+// CHECK-32: call void @llvm.ppc.fence()
+// CHECK-32-NEXT: ret void
+//
+void test_fence() {
+ __fence();
+}
+
// CHECK-LABEL: @test_builtin_ppc_popcntb(
// CHECK: [[TMP0:%.*]] = load i64, ptr @a, align 8
// CHECK-NEXT: [[POPCNTB:%.*]] = call i64 @llvm.ppc.popcntb.i64.i64(i64 [[TMP0]])
@@ -375,3 +387,15 @@ void test_builtin_ppc_dcbtst() {
void test_builtin_ppc_dcbz() {
__builtin_ppc_dcbz(c);
}
+
+// CHECK-LABEL: @test_builtin_ppc_fence(
+// CHECK: call void @llvm.ppc.fence()
+// CHECK-NEXT: ret void
+//
+// CHECK-32-LABEL: @test_builtin_ppc_fence(
+// CHECK-32: call void @llvm.ppc.fence()
+// CHECK-32-NEXT: ret void
+//
+void test_builtin_ppc_fence() {
+ __builtin_ppc_fence();
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 3ede2a3736bf30..6d1e8eb47405dd 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -29,6 +29,11 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
[IntrArgMemOnly, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>;
+ // Emit pseudo instruction as fence of instruction motion
+ def int_ppc_fence : ClangBuiltin<"__builtin_ppc_fence">,
+ DefaultAttrsIntrinsic<[], [],
+ [IntrNoMerge, IntrHasSideEffects]>;
+
// Get content from current FPSCR register
def int_ppc_readflm : ClangBuiltin<"__builtin_readflm">,
DefaultAttrsIntrinsic<[llvm_double_ty], [],
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index aaced58defe603..af55c6cf337120 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2155,11 +2155,16 @@ bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const {
bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const {
+ switch (MI.getOpcode()) {
+ default: break;
// Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
// across them, since some FP operations may change content of FPSCR.
// TODO: Model FPSCR in PPC instruction definitions and remove the workaround
- if (MI.getOpcode() == PPC::MFFS || MI.getOpcode() == PPC::MTFSF)
+ case PPC::MFFS:
+ case PPC::MTFSF:
+ case PPC::FENCE:
return true;
+ }
return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index b1601739fd4569..c0344dfbf3a728 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1328,6 +1328,9 @@ def SETFLM : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FLM),
"#SETFLM", [(set f64:$FRT, (int_ppc_setflm f8rc:$FLM))]>;
}
+let isBarrier = 1, hasSideEffects = 1, Defs = [RM] in
+def FENCE : PPCEmitTimePseudo<(outs), (ins), "#FENCE", []>;
+
let Defs = [LR] in
def MovePCtoLR : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR", []>,
PPC970_Unit_BRU;
@@ -3187,6 +3190,7 @@ def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
(TCRETURNri CTRRC:$dst, imm:$imm)>;
+def : Pat<(int_ppc_fence), (FENCE)>;
def : Pat<(int_ppc_readflm), (MFFS)>;
def : Pat<(int_ppc_mffsl), (MFFSL)>;
diff --git a/llvm/test/CodeGen/PowerPC/fence.ll b/llvm/test/CodeGen/PowerPC/fence.ll
new file mode 100644
index 00000000000000..da14e8be0d4288
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fence.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \
+; RUN: -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux \
+; RUN: -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc < %s -mtriple powerpc64le-unknown-linux -debug-only=machine-scheduler \
+; RUN: 2>&1 | FileCheck %s --check-prefix=LOG
+
+define dso_local void @test_builtin_ppc_fence() {
+; CHECK-LABEL: test_builtin_ppc_fence:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #FENCE
+; CHECK-NEXT: blr
+entry:
+ call void @llvm.ppc.fence()
+ ret void
+}
+declare void @llvm.ppc.fence()
+
+; LOG: ***** MI Scheduling *****
+; LOG-NEXT: motion:%bb.0 entry
+; LOG: ExitSU: FENCE implicit-def dead $rm
+; LOG: ***** MI Scheduling *****
+; LOG-NEXT: motion:%bb.0 entry
+; LOG: ExitSU: FENCE implicit-def dead $rm
+;
+; LOG: ***** MI Scheduling *****
+; LOG-NEXT: motion:%bb.0 entry
+; LOG: ExitSU: FENCE implicit-def dead $rm
+; LOG: ***** MI Scheduling *****
+; LOG-NEXT: motion:%bb.0 entry
+; LOG: ExitSU: FENCE implicit-def dead $rm
+define double @motion(double %a, double %b, double %c, double %d) {
+entry:
+ %0 = fdiv double %a, %b
+ %1 = fdiv double %b, %d
+ call void @llvm.ppc.fence()
+ %2 = fdiv double %c, %d
+ %3 = fdiv double %a, %c
+ call void @llvm.ppc.fence()
+ %4 = fadd double %0, %1
+ %5 = fadd double %2, %3
+ %6 = fsub double %4, %5
+ ret double %6
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with nits.
This builtin will work as barrier for instruction motion (scheduling, etc.)