From 457ddd311a164b31c7ef431abd4fd5dba84683f4 Mon Sep 17 00:00:00 2001 From: Sean Fertile Date: Wed, 31 May 2017 18:20:17 +0000 Subject: [PATCH] [PowerPC] Correctly specify the cache line size for Power 7, 8 and 9. Fixes PPCTTIImpl::getCacheLineSize() returning the wrong cache line size for newer ppc processors. Commiting on behalf of Stefan Pintilie. Differential Revision: https://reviews.llvm.org/D33656 llvm-svn: 304317 --- .../Target/PowerPC/PPCTargetTransformInfo.cpp | 15 ++++-- .../PowerPC/ppc64-get-cache-line-size.ll | 49 +++++++++++++++++++ 2 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/ppc64-get-cache-line-size.ll diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 83768da3a9a42..5559cdc5fe467 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -244,9 +244,18 @@ unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) { } unsigned PPCTTIImpl::getCacheLineSize() { - // This is currently only used for the data prefetch pass which is only - // enabled for BG/Q by default. - return CacheLineSize; + // Check first if the user specified a custom line size. + if (CacheLineSize.getNumOccurrences() > 0) + return CacheLineSize; + + // On P7, P8 or P9 we have a cache line size of 128. + unsigned Directive = ST->getDarwinDirective(); + if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || + Directive == PPC::DIR_PWR9) + return 128; + + // On other processors return a default of 64 bytes. + return 64; } unsigned PPCTTIImpl::getPrefetchDistance() { diff --git a/llvm/test/CodeGen/PowerPC/ppc64-get-cache-line-size.ll b/llvm/test/CodeGen/PowerPC/ppc64-get-cache-line-size.ll new file mode 100644 index 0000000000000..7ca5332865caa --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppc64-get-cache-line-size.ll @@ -0,0 +1,49 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-ppc-prefetching=true | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-ppc-prefetching=true -ppc-loop-prefetch-cache-line=64 | FileCheck %s -check-prefix=CHECK-DCBT +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -enable-ppc-prefetching=true | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -enable-ppc-prefetching=true -ppc-loop-prefetch-cache-line=64 | FileCheck %s -check-prefix=CHECK-DCBT +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -enable-ppc-prefetching=true | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -enable-ppc-prefetching=true -ppc-loop-prefetch-cache-line=64 | FileCheck %s -check-prefix=CHECK-DCBT +; RUN: llc < %s -march=ppc64 -mcpu=a2 -enable-ppc-prefetching=true | FileCheck %s -check-prefix=CHECK-DCBT + +; Function Attrs: nounwind +define signext i32 @check_cache_line() local_unnamed_addr { +entry: + %call = tail call i32* bitcast (i32* (...)* @magici to i32* ()*)() + %call115 = tail call signext i32 bitcast (i32 (...)* @iter to i32 ()*)() + %cmp16 = icmp sgt i32 %call115, 0 + br i1 %cmp16, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.body, %entry + %res.0.lcssa = phi i32 [ 0, %entry ], [ %add5, %for.body ] + ret i32 %res.0.lcssa + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %res.017 = phi i32 [ %add5, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %call, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %res.017 + %1 = add nuw nsw i64 %indvars.iv, 16 + %arrayidx4 = getelementptr inbounds i32, i32* %call, i64 %1 + %2 = load i32, i32* %arrayidx4, align 4 + %add5 = add nsw i32 %add, %2 + %indvars.iv.next = add nuw i64 %indvars.iv, 1 + %call1 = tail call signext i32 bitcast (i32 (...)* @iter to i32 ()*)() + %3 = sext i32 %call1 to i64 + %cmp = icmp slt i64 %indvars.iv.next, %3 + br i1 %cmp, label %for.body, label %for.cond.cleanup +; CHECK-LABEL: check_cache_line +; CHECK: dcbt +; CHECK-NOT: dcbt +; CHECK: blr +; CHECK-DCBT-LABEL: check_cache_line +; CHECK-DCBT: dcbt +; CHECK-DCBT: dcbt +; CHECK-DCBT: blr +} + +declare i32* @magici(...) local_unnamed_addr + +declare signext i32 @iter(...) local_unnamed_addr +