Expand Up
@@ -6,11 +6,13 @@
//
// ===----------------------------------------------------------------------===//
//
// This file contains functions to create scalar and parallel loops as LLVM-IR.
// This file contains functions to create scalar loops and orchestrate the
// creation of parallel loops as LLVM-IR.
//
// ===----------------------------------------------------------------------===//
#include " polly/CodeGen/LoopGenerators.h"
#include " polly/Options.h"
#include " polly/ScopDetection.h"
#include " llvm/Analysis/LoopInfo.h"
#include " llvm/IR/DataLayout.h"
Expand All
@@ -22,10 +24,36 @@
using namespace llvm ;
using namespace polly ;
static cl::opt<int >
PollyNumThreads (" polly-num-threads" ,
cl::desc (" Number of threads to use (0 = auto)" ), cl::Hidden,
cl::init(0 ));
int polly::PollyNumThreads;
OMPGeneralSchedulingType polly::PollyScheduling;
int polly::PollyChunkSize;
static cl::opt<int , true >
XPollyNumThreads (" polly-num-threads" ,
cl::desc (" Number of threads to use (0 = auto)" ),
cl::Hidden, cl::location(polly::PollyNumThreads),
cl::init(0 ), cl::cat(PollyCategory));
static cl::opt<OMPGeneralSchedulingType, true > XPollyScheduling (
" polly-scheduling" ,
cl::desc (" Scheduling type of parallel OpenMP for loops" ),
cl::values(clEnumValN(OMPGeneralSchedulingType::StaticChunked, " static" ,
" Static scheduling" ),
clEnumValN(OMPGeneralSchedulingType::Dynamic, " dynamic" ,
" Dynamic scheduling" ),
clEnumValN(OMPGeneralSchedulingType::Guided, " guided" ,
" Guided scheduling" ),
clEnumValN(OMPGeneralSchedulingType::Runtime, " runtime" ,
" Runtime determined (OMP_SCHEDULE)" )),
cl::Hidden, cl::location(polly::PollyScheduling),
cl::init(OMPGeneralSchedulingType::Runtime), cl::Optional,
cl::cat(PollyCategory));
static cl::opt<int , true >
XPollyChunkSize (" polly-scheduling-chunksize" ,
cl::desc (" Chunksize to use by the OpenMP runtime calls" ),
cl::Hidden, cl::location(polly::PollyChunkSize),
cl::init(0 ), cl::Optional, cl::cat(PollyCategory));
// We generate a loop of either of the following structures:
//
Expand Down
Expand Up
@@ -147,11 +175,13 @@ Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
Value *ParallelLoopGenerator::createParallelLoop (
Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
ValueMapT &Map, BasicBlock::iterator *LoopBody) {
Function *SubFn;
AllocaInst *Struct = storeValuesIntoStruct (UsedValues);
BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint ();
Value *IV = createSubFn (Stride, Struct, UsedValues, Map, &SubFn);
Value *IV;
Function *SubFn;
std::tie (IV, SubFn) = createSubFn (Stride, Struct, UsedValues, Map);
*LoopBody = Builder.GetInsertPoint ();
Builder.SetInsertPoint (&*BeforeLoop);
Expand All
@@ -162,102 +192,15 @@ Value *ParallelLoopGenerator::createParallelLoop(
// whereas the codegenForSequential function creates a <= comparison.
UB = Builder.CreateAdd (UB, ConstantInt::get (LongType, 1 ));
// Tell the runtime we start a parallel loop
createCallSpawnThreads (SubFn, SubFnParam, LB, UB, Stride);
Builder.CreateCall (SubFn, SubFnParam);
createCallJoinThreads ();
// Execute the prepared subfunction in parallel.
deployParallelExecution (SubFn, SubFnParam, LB, UB, Stride);
return IV;
}
void ParallelLoopGenerator::createCallSpawnThreads (Value *SubFn,
Value *SubFnParam, Value *LB,
Value *UB, Value *Stride) {
const std::string Name = " GOMP_parallel_loop_runtime_start" ;
Function *F = M->getFunction (Name);
// If F is not available, declare it.
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
Type *Params[] = {PointerType::getUnqual (FunctionType::get (
Builder.getVoidTy (), Builder.getInt8PtrTy (), false )),
Builder.getInt8PtrTy (),
Builder.getInt32Ty (),
LongType,
LongType,
LongType};
FunctionType *Ty = FunctionType::get (Builder.getVoidTy (), Params, false );
F = Function::Create (Ty, Linkage, Name, M);
}
Value *NumberOfThreads = Builder.getInt32 (PollyNumThreads);
Value *Args[] = {SubFn, SubFnParam, NumberOfThreads, LB, UB, Stride};
Builder.CreateCall (F, Args);
}
Value *ParallelLoopGenerator::createCallGetWorkItem (Value *LBPtr,
Value *UBPtr) {
const std::string Name = " GOMP_loop_runtime_next" ;
Function *F = M->getFunction (Name);
// If F is not available, declare it.
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
Type *Params[] = {LongType->getPointerTo (), LongType->getPointerTo ()};
FunctionType *Ty = FunctionType::get (Builder.getInt8Ty (), Params, false );
F = Function::Create (Ty, Linkage, Name, M);
}
Value *Args[] = {LBPtr, UBPtr};
Value *Return = Builder.CreateCall (F, Args);
Return = Builder.CreateICmpNE (
Return, Builder.CreateZExt (Builder.getFalse (), Return->getType ()));
return Return;
}
void ParallelLoopGenerator::createCallJoinThreads () {
const std::string Name = " GOMP_parallel_end" ;
Function *F = M->getFunction (Name);
// If F is not available, declare it.
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
FunctionType *Ty = FunctionType::get (Builder.getVoidTy (), false );
F = Function::Create (Ty, Linkage, Name, M);
}
Builder.CreateCall (F, {});
}
void ParallelLoopGenerator::createCallCleanupThread () {
const std::string Name = " GOMP_loop_end_nowait" ;
Function *F = M->getFunction (Name);
// If F is not available, declare it.
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
FunctionType *Ty = FunctionType::get (Builder.getVoidTy (), false );
F = Function::Create (Ty, Linkage, Name, M);
}
Builder.CreateCall (F, {});
}
Function *ParallelLoopGenerator::createSubFnDefinition () {
Function *F = Builder.GetInsertBlock ()->getParent ();
std::vector<Type *> Arguments (1 , Builder.getInt8PtrTy ());
FunctionType *FT = FunctionType::get (Builder.getVoidTy (), Arguments, false );
Function *SubFn = Function::Create (FT, Function::InternalLinkage,
F->getName () + " _polly_subfn" , M);
Function *SubFn = prepareSubFnDefinition (F);
// Certain backends (e.g., NVPTX) do not support '.'s in function names.
// Hence, we ensure that all '.'s are replaced by '_'s.
Expand All
@@ -268,9 +211,6 @@ Function *ParallelLoopGenerator::createSubFnDefinition() {
// Do not run any polly pass on the new function.
SubFn->addFnAttr (PollySkipFnAttr);
Function::arg_iterator AI = SubFn->arg_begin ();
AI->setName (" polly.par.userContext" );
return SubFn;
}
Expand Down
Expand Up
@@ -310,71 +250,3 @@ void ParallelLoopGenerator::extractValuesFromStruct(
Map[OldValues[i]] = NewValue;
}
}
Value *ParallelLoopGenerator::createSubFn (Value *Stride, AllocaInst *StructData,
SetVector<Value *> Data,
ValueMapT &Map, Function **SubFnPtr) {
BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *PreHeaderBB, *AfterBB;
Value *LBPtr, *UBPtr, *UserContext, *Ret1, *HasNextSchedule, *LB, *UB, *IV;
Function *SubFn = createSubFnDefinition ();
LLVMContext &Context = SubFn->getContext ();
// Store the previous basic block.
PrevBB = Builder.GetInsertBlock ();
// Create basic blocks.
HeaderBB = BasicBlock::Create (Context, " polly.par.setup" , SubFn);
ExitBB = BasicBlock::Create (Context, " polly.par.exit" , SubFn);
CheckNextBB = BasicBlock::Create (Context, " polly.par.checkNext" , SubFn);
PreHeaderBB = BasicBlock::Create (Context, " polly.par.loadIVBounds" , SubFn);
DT.addNewBlock (HeaderBB, PrevBB);
DT.addNewBlock (ExitBB, HeaderBB);
DT.addNewBlock (CheckNextBB, HeaderBB);
DT.addNewBlock (PreHeaderBB, HeaderBB);
// Fill up basic block HeaderBB.
Builder.SetInsertPoint (HeaderBB);
LBPtr = Builder.CreateAlloca (LongType, nullptr , " polly.par.LBPtr" );
UBPtr = Builder.CreateAlloca (LongType, nullptr , " polly.par.UBPtr" );
UserContext = Builder.CreateBitCast (
&*SubFn->arg_begin (), StructData->getType (), " polly.par.userContext" );
extractValuesFromStruct (Data, StructData->getAllocatedType (), UserContext,
Map);
Builder.CreateBr (CheckNextBB);
// Add code to check if another set of iterations will be executed.
Builder.SetInsertPoint (CheckNextBB);
Ret1 = createCallGetWorkItem (LBPtr, UBPtr);
HasNextSchedule = Builder.CreateTrunc (Ret1, Builder.getInt1Ty (),
" polly.par.hasNextScheduleBlock" );
Builder.CreateCondBr (HasNextSchedule, PreHeaderBB, ExitBB);
// Add code to load the iv bounds for this set of iterations.
Builder.SetInsertPoint (PreHeaderBB);
LB = Builder.CreateLoad (LBPtr, " polly.par.LB" );
UB = Builder.CreateLoad (UBPtr, " polly.par.UB" );
// Subtract one as the upper bound provided by OpenMP is a < comparison
// whereas the codegenForSequential function creates a <= comparison.
UB = Builder.CreateSub (UB, ConstantInt::get (LongType, 1 ),
" polly.par.UBAdjusted" );
Builder.CreateBr (CheckNextBB);
Builder.SetInsertPoint (&*--Builder.GetInsertPoint ());
IV = createLoop (LB, UB, Stride, Builder, LI, DT, AfterBB, ICmpInst::ICMP_SLE,
nullptr , true , /* UseGuard */ false );
BasicBlock::iterator LoopBody = Builder.GetInsertPoint ();
// Add code to terminate this subfunction.
Builder.SetInsertPoint (ExitBB);
createCallCleanupThread ();
Builder.CreateRetVoid ();
Builder.SetInsertPoint (&*LoopBody);
*SubFnPtr = SubFn;
return IV;
}