diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index a35488ed3dfa5..88ae0ce940852 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -110,6 +110,10 @@ BUILTIN(__builtin_ppc_fctiw, "dd", "") BUILTIN(__builtin_ppc_fctiwz, "dd", "") BUILTIN(__builtin_ppc_fctudz, "dd", "") BUILTIN(__builtin_ppc_fctuwz, "dd", "") + +// fence builtin prevents all instructions moved across it +BUILTIN(__builtin_ppc_fence, "v", "") + BUILTIN(__builtin_ppc_swdiv_nochk, "ddd", "") BUILTIN(__builtin_ppc_swdivs_nochk, "fff", "") BUILTIN(__builtin_ppc_alignx, "vIivC*", "nc") diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 045c273f03c7a..41935abfb65d3 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -212,6 +212,7 @@ static void defineXLCompatMacros(MacroBuilder &Builder) { Builder.defineMacro("__darn_32", "__builtin_darn_32"); Builder.defineMacro("__darn_raw", "__builtin_darn_raw"); Builder.defineMacro("__dcbf", "__builtin_dcbf"); + Builder.defineMacro("__fence", "__builtin_ppc_fence"); Builder.defineMacro("__fmadd", "__builtin_fma"); Builder.defineMacro("__fmadds", "__builtin_fmaf"); Builder.defineMacro("__abs", "__builtin_abs"); diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c index 9187bb855dac2..a5cc97161c56a 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c @@ -194,6 +194,18 @@ void test_dcbz() { __dcbz(c); } +// CHECK-LABEL: @test_fence( +// CHECK: call void @llvm.ppc.fence() +// CHECK-NEXT: ret void +// +// CHECK-32-LABEL: @test_fence( +// CHECK-32: call void @llvm.ppc.fence() +// CHECK-32-NEXT: ret void +// +void test_fence() { + __fence(); +} + // CHECK-LABEL: @test_builtin_ppc_popcntb( // CHECK: [[TMP0:%.*]] = load i64, ptr @a, align 8 // CHECK-NEXT: [[POPCNTB:%.*]] = call i64 @llvm.ppc.popcntb.i64.i64(i64 [[TMP0]]) @@ -375,3 +387,15 @@ void test_builtin_ppc_dcbtst() { void test_builtin_ppc_dcbz() { __builtin_ppc_dcbz(c); } + +// CHECK-LABEL: @test_builtin_ppc_fence( +// CHECK: call void @llvm.ppc.fence() +// CHECK-NEXT: ret void +// +// CHECK-32-LABEL: @test_builtin_ppc_fence( +// CHECK-32: call void @llvm.ppc.fence() +// CHECK-32-NEXT: ret void +// +void test_builtin_ppc_fence() { + __builtin_ppc_fence(); +} diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 3ede2a3736bf3..6d1e8eb47405d 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -29,6 +29,11 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". [IntrArgMemOnly, NoCapture>, ImmArg>]>; def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>; + // Emit pseudo instruction as fence of instruction motion + def int_ppc_fence : ClangBuiltin<"__builtin_ppc_fence">, + DefaultAttrsIntrinsic<[], [], + [IntrNoMerge, IntrHasSideEffects]>; + // Get content from current FPSCR register def int_ppc_readflm : ClangBuiltin<"__builtin_readflm">, DefaultAttrsIntrinsic<[llvm_double_ty], [], diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index aaced58defe60..538e0e6b3d420 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2155,11 +2155,17 @@ bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const { bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const { + switch (MI.getOpcode()) { + default: + break; // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion // across them, since some FP operations may change content of FPSCR. // TODO: Model FPSCR in PPC instruction definitions and remove the workaround - if (MI.getOpcode() == PPC::MFFS || MI.getOpcode() == PPC::MTFSF) + case PPC::MFFS: + case PPC::MTFSF: + case PPC::FENCE: return true; + } return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF); } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index bf756e39bd5d0..5550ba4207392 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1328,6 +1328,9 @@ def SETFLM : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FLM), "#SETFLM", [(set f64:$FRT, (int_ppc_setflm f8rc:$FLM))]>; } +let isBarrier = 1, hasSideEffects = 1, Defs = [RM] in +def FENCE : PPCEmitTimePseudo<(outs), (ins), "#FENCE", []>; + let Defs = [LR] in def MovePCtoLR : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR", []>, PPC970_Unit_BRU; @@ -3187,6 +3190,7 @@ def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm), def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm), (TCRETURNri CTRRC:$dst, imm:$imm)>; +def : Pat<(int_ppc_fence), (FENCE)>; def : Pat<(int_ppc_readflm), (MFFS)>; def : Pat<(int_ppc_mffsl), (MFFSL)>; diff --git a/llvm/test/CodeGen/PowerPC/fence.ll b/llvm/test/CodeGen/PowerPC/fence.ll new file mode 100644 index 0000000000000..da14e8be0d428 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fence.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc < %s -mtriple powerpc64le-unknown-linux -debug-only=machine-scheduler \ +; RUN: 2>&1 | FileCheck %s --check-prefix=LOG + +define dso_local void @test_builtin_ppc_fence() { +; CHECK-LABEL: test_builtin_ppc_fence: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #FENCE +; CHECK-NEXT: blr +entry: + call void @llvm.ppc.fence() + ret void +} +declare void @llvm.ppc.fence() + +; LOG: ***** MI Scheduling ***** +; LOG-NEXT: motion:%bb.0 entry +; LOG: ExitSU: FENCE implicit-def dead $rm +; LOG: ***** MI Scheduling ***** +; LOG-NEXT: motion:%bb.0 entry +; LOG: ExitSU: FENCE implicit-def dead $rm +; +; LOG: ***** MI Scheduling ***** +; LOG-NEXT: motion:%bb.0 entry +; LOG: ExitSU: FENCE implicit-def dead $rm +; LOG: ***** MI Scheduling ***** +; LOG-NEXT: motion:%bb.0 entry +; LOG: ExitSU: FENCE implicit-def dead $rm +define double @motion(double %a, double %b, double %c, double %d) { +entry: + %0 = fdiv double %a, %b + %1 = fdiv double %b, %d + call void @llvm.ppc.fence() + %2 = fdiv double %c, %d + %3 = fdiv double %a, %c + call void @llvm.ppc.fence() + %4 = fadd double %0, %1 + %5 = fadd double %2, %3 + %6 = fsub double %4, %5 + ret double %6 +}