Skip to content
This repository has been archived by the owner on Apr 23, 2020. It is now read-only.

Commit

Permalink
LoadStoreVectorizer: Check TTI for vec reg bit width
Browse files Browse the repository at this point in the history
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274322 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
arsenm committed Jul 1, 2016
1 parent 0b5646a commit d8f310c
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 19 deletions.
2 changes: 1 addition & 1 deletion include/llvm/LinkAllPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ namespace {
(void) llvm::createInstructionSimplifierPass();
(void) llvm::createLoopVectorizePass();
(void) llvm::createSLPVectorizerPass();
(void) llvm::createLoadStoreVectorizerPass(128);
(void) llvm::createLoadStoreVectorizerPass();
(void) llvm::createBBVectorizePass();
(void) llvm::createPartiallyInlineLibCallsPass();
(void) llvm::createScalarizerPass();
Expand Down
2 changes: 1 addition & 1 deletion include/llvm/Transforms/Vectorize.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ bool vectorizeBasicBlock(Pass *P, BasicBlock &BB,
// LoadStoreVectorizer - Create vector loads and stores, but leave scalar
// operations.
//
Pass *createLoadStoreVectorizerPass(unsigned VecRegSize = 128);
Pass *createLoadStoreVectorizerPass();

} // End llvm namespace

Expand Down
44 changes: 28 additions & 16 deletions lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,18 @@ class Vectorizer {
AliasAnalysis &AA;
DominatorTree &DT;
ScalarEvolution &SE;
TargetTransformInfo &TTI;
const DataLayout &DL;
IRBuilder<> Builder;
ValueListMap StoreRefs;
ValueListMap LoadRefs;
unsigned VecRegSize;

public:
Vectorizer(Function &F, AliasAnalysis &AA, DominatorTree &DT,
ScalarEvolution &SE, unsigned VecRegSize)
: F(F), AA(AA), DT(DT), SE(SE), DL(F.getParent()->getDataLayout()),
Builder(SE.getContext()), VecRegSize(VecRegSize) {}
ScalarEvolution &SE, TargetTransformInfo &TTI)
: F(F), AA(AA), DT(DT), SE(SE), TTI(TTI),
DL(F.getParent()->getDataLayout()),
Builder(SE.getContext()) {}

bool run();

Expand Down Expand Up @@ -116,10 +117,8 @@ class Vectorizer {
class LoadStoreVectorizer : public FunctionPass {
public:
static char ID;
unsigned VecRegSize;

LoadStoreVectorizer(unsigned VecRegSize = 128) : FunctionPass(ID),
VecRegSize(VecRegSize) {
LoadStoreVectorizer() : FunctionPass(ID) {
initializeLoadStoreVectorizerPass(*PassRegistry::getPassRegistry());
}

Expand All @@ -133,6 +132,7 @@ class LoadStoreVectorizer : public FunctionPass {
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.setPreservesCFG();
}
};
Expand All @@ -144,13 +144,14 @@ INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoadStoreVectorizer, DEBUG_TYPE,
"Vectorize load and store instructions", false, false);

char LoadStoreVectorizer::ID = 0;

Pass *llvm::createLoadStoreVectorizerPass(unsigned VecRegSize) {
return new LoadStoreVectorizer(VecRegSize);
Pass *llvm::createLoadStoreVectorizerPass() {
return new LoadStoreVectorizer();
}

bool LoadStoreVectorizer::runOnFunction(Function &F) {
Expand All @@ -161,8 +162,10 @@ bool LoadStoreVectorizer::runOnFunction(Function &F) {
AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
TargetTransformInfo &TTI
= getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);

Vectorizer V(F, AA, DT, SE, VecRegSize);
Vectorizer V(F, AA, DT, SE, TTI);
return V.run();
}

Expand Down Expand Up @@ -440,6 +443,10 @@ void Vectorizer::collectInstructions(BasicBlock *BB) {
if (TySize < 8)
continue;

Value *Ptr = LI->getPointerOperand();
unsigned AS = Ptr->getType()->getPointerAddressSpace();
unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);

// No point in looking at these if they're too big to vectorize.
if (TySize > VecRegSize / 2)
continue;
Expand All @@ -456,8 +463,8 @@ void Vectorizer::collectInstructions(BasicBlock *BB) {
// TODO: Target hook to filter types.

// Save the load locations.
Value *Ptr = GetUnderlyingObject(LI->getPointerOperand(), DL);
LoadRefs[Ptr].push_back(LI);
Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
LoadRefs[ObjPtr].push_back(LI);

} else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
if (!SI->isSimple())
Expand All @@ -473,6 +480,9 @@ void Vectorizer::collectInstructions(BasicBlock *BB) {
if (TySize < 8)
continue;

Value *Ptr = SI->getPointerOperand();
unsigned AS = Ptr->getType()->getPointerAddressSpace();
unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
if (TySize > VecRegSize / 2)
continue;

Expand All @@ -485,8 +495,8 @@ void Vectorizer::collectInstructions(BasicBlock *BB) {
continue;

// Save store location.
Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), DL);
StoreRefs[Ptr].push_back(SI);
Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
StoreRefs[ObjPtr].push_back(SI);
}
}
}
Expand Down Expand Up @@ -592,6 +602,8 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
}

unsigned Sz = DL.getTypeSizeInBits(StoreTy);
unsigned AS = S0->getPointerAddressSpace();
unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
unsigned VF = VecRegSize / Sz;
unsigned ChainSize = Chain.size();

Expand Down Expand Up @@ -664,7 +676,6 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {

// Set insert point.
Builder.SetInsertPoint(&*Last);
unsigned AS = S0->getPointerAddressSpace();

Value *Vec = UndefValue::get(VecTy);

Expand Down Expand Up @@ -728,6 +739,8 @@ bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
}

unsigned Sz = DL.getTypeSizeInBits(LoadTy);
unsigned AS = L0->getPointerAddressSpace();
unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
unsigned VF = VecRegSize / Sz;
unsigned ChainSize = Chain.size();

Expand Down Expand Up @@ -798,7 +811,6 @@ bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
// Set insert point.
Builder.SetInsertPoint(&*Last);

unsigned AS = L0->getPointerAddressSpace();
Value *Bitcast =
Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));

Expand Down
51 changes: 51 additions & 0 deletions test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-4 -load-store-vectorizer -S -o - %s | FileCheck -check-prefix=ELT4 -check-prefix=ALL %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-8 -load-store-vectorizer -S -o - %s | FileCheck -check-prefix=ELT8 -check-prefix=ALL %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-16 -load-store-vectorizer -S -o - %s | FileCheck -check-prefix=ELT16 -check-prefix=ALL %s

; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32
; ELT4: store i32
; ELT4: store i32
; ELT4: store i32
; ELT4: store i32

; ELT8: store <2 x i32>
; ELT8: store <2 x i32>

; ELT16: store <4 x i32>
define void @merge_private_store_4_vector_elts_loads_v4i32(i32* %out) #0 {
%out.gep.1 = getelementptr i32, i32* %out, i32 1
%out.gep.2 = getelementptr i32, i32* %out, i32 2
%out.gep.3 = getelementptr i32, i32* %out, i32 3

store i32 9, i32* %out
store i32 1, i32* %out.gep.1
store i32 23, i32* %out.gep.2
store i32 19, i32* %out.gep.3
ret void
}

; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i8(
; ALL: store <4 x i8>
define void @merge_private_store_4_vector_elts_loads_v4i8(i8* %out) #0 {
%out.gep.1 = getelementptr i8, i8* %out, i32 1
%out.gep.2 = getelementptr i8, i8* %out, i32 2
%out.gep.3 = getelementptr i8, i8* %out, i32 3

store i8 9, i8* %out
store i8 1, i8* %out.gep.1
store i8 23, i8* %out.gep.2
store i8 19, i8* %out.gep.3
ret void
}

; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v2i16(
; ALL: store <2 x i16>
define void @merge_private_store_4_vector_elts_loads_v2i16(i16* %out) #0 {
%out.gep.1 = getelementptr i16, i16* %out, i32 1

store i16 9, i16* %out
store i16 12, i16* %out.gep.1
ret void
}

attributes #0 = { nounwind }
3 changes: 2 additions & 1 deletion test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,8 @@ define void @merge_local_store_2_constants_i32_align_2(i32 addrspace(3)* %out) #
}

; CHECK-LABEL: @merge_local_store_4_constants_i32
; CHECK: store <4 x i32> <i32 1234, i32 123, i32 456, i32 333>, <4 x i32> addrspace(3)*
; CHECK: store <2 x i32> <i32 456, i32 333>, <2 x i32> addrspace(3)*
; CHECK: store <2 x i32> <i32 1234, i32 123>, <2 x i32> addrspace(3)*
define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
%out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2
Expand Down

0 comments on commit d8f310c

Please sign in to comment.