Skip to content

Commit

Permalink
[Refactor] Generalize parallel code generation
Browse files Browse the repository at this point in the history
  + Generalized function names and comments
    + Removed OpenMP (omp) from the names and comments
    + Use common names (non OpenMP specific) for runtime library call creation
      methodes
  + Commented the parallel code generator and all its member functions
  + Refactored some values and methodes

Differential Revision: http://reviews.llvm.org/D4990

llvm-svn: 219003
  • Loading branch information
Johannes Doerfert committed Oct 3, 2014
1 parent fae1dc8 commit 12b355a
Show file tree
Hide file tree
Showing 11 changed files with 268 additions and 229 deletions.
190 changes: 139 additions & 51 deletions polly/include/polly/CodeGen/LoopGenerators.h
Expand Up @@ -13,10 +13,11 @@
//===----------------------------------------------------------------------===//
#ifndef POLLY_LOOP_GENERATORS_H
#define POLLY_LOOP_GENERATORS_H

#include "polly/CodeGen/IRBuilder.h"
#include "llvm/ADT/SetVector.h"

#include <map>
#include "llvm/IR/ValueMap.h"
#include "llvm/ADT/SetVector.h"

namespace llvm {
class Value;
Expand Down Expand Up @@ -54,73 +55,160 @@ Value *createLoop(Value *LowerBound, Value *UpperBound, Value *Stride,
ScopAnnotator *Annotator = NULL, bool Parallel = false,
bool UseGuard = true);

class OMPGenerator {
/// @brief The ParallelLoopGenerator allows to create parallelized loops
///
/// To parallelize a loop, we perform the following steps:
/// o Generate a subfunction which will hold the loop body.
/// o Create a struct to hold all outer values needed in the loop body.
/// o Create calls to a runtime library to achieve the actual parallelism.
/// These calls will spawn and join threads, define how the work (here the
/// iterations) are distributed between them and make sure each has access
/// to the struct holding all needed values.
///
/// At the moment we support only one parallel runtime, OpenMP.
///
/// If we parallelize the outer loop of the following loop nest,
///
/// S0;
/// for (int i = 0; i < N; i++)
/// for (int j = 0; j < M; j++)
/// S1(i, j);
/// S2;
///
/// we will generate the following code (with different runtime function names):
///
/// S0;
/// auto *values = storeValuesIntoStruct();
/// // Execute subfunction with multiple threads
/// spawn_threads(subfunction, values);
/// join_threads();
/// S2;
///
/// // This function is executed in parallel by different threads
/// void subfunction(values) {
/// while (auto *WorkItem = getWorkItem()) {
/// int LB = WorkItem.begin();
/// int UB = WorkItem.end();
/// for (int i = LB; i < UB; i++)
/// for (int j = 0; j < M; j++)
/// S1(i, j);
/// }
/// cleanup_thread();
/// }
class ParallelLoopGenerator {
public:
typedef std::map<Value *, Value *> ValueToValueMapTy;
using ValueToValueMapTy = llvm::ValueMap<Value *, Value *>;

OMPGenerator(PollyIRBuilder &Builder, Pass *P) : Builder(Builder), P(P) {}
/// @brief Create a parallel loop generator for the current function.
ParallelLoopGenerator(PollyIRBuilder &Builder, Pass *P, LoopInfo &LI,
DominatorTree &DT, const DataLayout &DL)
: Builder(Builder), P(P), LI(LI), DT(DT), DL(DL),
LongType(
Type::getIntNTy(Builder.getContext(), DL.getPointerSizeInBits())),
M(Builder.GetInsertBlock()->getParent()->getParent()) {}

/// @brief Create an OpenMP parallel loop.
///
/// @brief Create a parallel loop
///
/// @param LowerBound The starting value of the induction variable.
/// @param UpperBound The upper bound of the induction variable.
/// @param Stride The value by which the induction variable is
/// incremented.
///
/// @param UsedValues A set of LLVM-IR Values that should be available to
/// the new loop body.
/// @param VMap This map is filled by createParallelLoop(). It
/// maps the values in UsedValues to Values through which
/// their content is available within the loop body.
/// @param LoopBody A pointer to an iterator that is set to point to the
/// body of the created loop. It should be used to insert
/// instructions that form the actual loop body.
/// @param LB The lower bound for the loop we parallelize.
/// @param UB The upper bound for the loop we parallelize.
/// @param Stride The stride of the loop we parallelize.
/// @param Values A set of LLVM-IR Values that should be available in
/// the new loop body.
/// @param VMap A map to allow outside access to the new versions of
/// the values in @p Values.
/// @param LoopBody A pointer to an iterator that is set to point to the
/// body of the created loop. It should be used to insert
/// instructions that form the actual loop body.
///
/// @return Value* The newly created induction variable for this loop.
Value *createParallelLoop(Value *LowerBound, Value *UpperBound, Value *Stride,
SetVector<Value *> &UsedValues,
ValueToValueMapTy &VMap,
/// @return The newly created induction variable for this loop.
Value *createParallelLoop(Value *LB, Value *UB, Value *Stride,
SetVector<Value *> &Values, ValueToValueMapTy &VMap,
BasicBlock::iterator *LoopBody);

private:
/// @brief The IR builder we use to create instructions.
PollyIRBuilder &Builder;

/// @brief A pass pointer to update analysis information.
Pass *P;

IntegerType *getIntPtrTy();
Module *getModule();
/// @brief The loop info of the current function we need to update.
LoopInfo &LI;

/// @brief The dominance tree of the current function we need to update.
DominatorTree &DT;

/// @brief The target layout to get the right size for types.
const DataLayout &DL;

void createCallParallelLoopStart(Value *SubFunction, Value *SubfunctionParam,
Value *NumberOfThreads, Value *LowerBound,
Value *UpperBound, Value *Stride);
Value *createCallLoopNext(Value *LowerBoundPtr, Value *UpperBoundPtr);
void createCallParallelEnd();
void createCallLoopEndNowait();
/// @brief The type of a "long" on this hardware used for backend calls.
Type *LongType;

Value *loadValuesIntoStruct(SetVector<Value *> &Values);
void extractValuesFromStruct(SetVector<Value *> OldValues, Value *Struct,
ValueToValueMapTy &Map);
/// @brief The current module
Module *M;

/// @brief Create the OpenMP subfunction.
/// @brief Create a runtime library call to spawn the worker threads.
///
/// @param Stride The value by which the induction variable is
/// incremented.
/// @param Struct The structure that is used to make Values available to
/// the loop body.
/// @param UsedValues A set of LLVM-IR Values that should be available to
/// the new loop body.
/// @param VMap This map that is filled by createSubfunction(). It
/// maps the values in UsedValues to Values through which
/// their content is available within the loop body.
/// @param SubFunction The newly created SubFunction is returned here.
/// @param SubFn The subfunction which holds the loop body.
/// @param SubFnParam The parameter for the subfunction (basically the struct
/// filled with the outside values).
/// @param LB The lower bound for the loop we parallelize.
/// @param UB The upper bound for the loop we parallelize.
/// @param Stride The stride of the loop we parallelize.
void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB,
Value *UB, Value *Stride);

/// @brief Create a runtime library call to join the worker threads.
void createCallJoinThreads();

/// @brief Create a runtime library call to get the next work item.
///
/// @return Value* The newly created induction variable.
Value *createSubfunction(Value *Stride, Value *Struct,
SetVector<Value *> UsedValues,
ValueToValueMapTy &VMap, Function **SubFunction);
/// @param LBPtr A pointer value to store the work item begin in.
/// @param UBPtr A pointer value to store the work item end in.
///
/// @returns A true value if the work item is not empty.
Value *createCallGetWorkItem(Value *LBPtr, Value *UBPtr);

/// @brief Create the definition of the OpenMP subfunction.
Function *createSubfunctionDefinition();
/// @brief Create a runtime library call to allow cleanup of the thread.
///
/// @note This function is called right before the thread will exit the
/// subfunction and only if the runtime system depends depends on it.
void createCallCleanupThread();

/// @brief Create a struct for all @p Values and store them in there.
///
/// @param Values The values which should be stored in the struct.
///
/// @return The created struct.
Value *storeValuesIntoStruct(SetVector<Value *> &Values);

/// @brief Extract all values from the @p Struct and construct the mapping.
///
/// @param Values The values which were stored in the struct.
/// @param Struct The struct holding all the values in @p Values.
/// @param VMap A map to associate every element of @p Values with the
/// new llvm value loaded from the @p Struct.
void extractValuesFromStruct(SetVector<Value *> Values, Value *Struct,
ValueToValueMapTy &VMap);

/// @brief Create the definition of the parallel subfunction.
Function *createSubFnDefinition();

/// @brief Create the parallel subfunction.
///
/// @param Stride The induction variable increment.
/// @param Struct A struct holding all values in @p Values.
/// @param Values A set of LLVM-IR Values that should be available in
/// the new loop body.
/// @param VMap A map to allow outside access to the new versions of
/// the values in @p Values.
/// @param SubFn The newly created subfunction is returned here.
///
/// @return The newly created induction variable.
Value *createSubFn(Value *Stride, Value *Struct,
SetVector<Value *> UsedValues, ValueToValueMapTy &VMap,
Function **SubFn);
};
} // end namespace polly
#endif
17 changes: 11 additions & 6 deletions polly/lib/CodeGen/CodeGeneration.cpp
Expand Up @@ -246,6 +246,7 @@ class ClastStmtCodeGen {
LoopInfo &LI;
ScalarEvolution &SE;
DominatorTree &DT;
const DataLayout &DL;

// The Builder specifies the current location to code generate at.
PollyIRBuilder &Builder;
Expand Down Expand Up @@ -316,7 +317,7 @@ class ClastStmtCodeGen {
/// @brief Update ClastVars and ValueMap according to a value map.
///
/// @param VMap A map from old to new values.
void updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap);
void updateWithValueMap(ParallelLoopGenerator::ValueToValueMapTy &VMap);

/// @brief Create an OpenMP parallel for loop.
///
Expand Down Expand Up @@ -579,8 +580,8 @@ SetVector<Value *> ClastStmtCodeGen::getOMPValues(const clast_stmt *Body) {
return Values;
}

void
ClastStmtCodeGen::updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap) {
void ClastStmtCodeGen::updateWithValueMap(
ParallelLoopGenerator::ValueToValueMapTy &VMap) {
std::set<Value *> Inserted;

for (const auto &I : ClastVars) {
Expand Down Expand Up @@ -611,8 +612,8 @@ void ClastStmtCodeGen::codegenForOpenMP(const clast_for *For) {
BasicBlock::iterator LoopBody;
IntegerType *IntPtrTy = getIntPtrTy();
SetVector<Value *> Values;
OMPGenerator::ValueToValueMapTy VMap;
OMPGenerator OMPGen(Builder, P);
ParallelLoopGenerator::ValueToValueMapTy VMap;
ParallelLoopGenerator OMPGen(Builder, P, LI, DT, DL);

Stride = Builder.getInt(APInt_from_MPZ(For->stride));
Stride = Builder.CreateSExtOrBitCast(Stride, IntPtrTy);
Expand Down Expand Up @@ -1025,7 +1026,8 @@ void ClastStmtCodeGen::codegen(const clast_root *r) {
ClastStmtCodeGen::ClastStmtCodeGen(Scop *scop, PollyIRBuilder &B, Pass *P)
: S(scop), P(P), LI(P->getAnalysis<LoopInfo>()),
SE(P->getAnalysis<ScalarEvolution>()),
DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()), Builder(B),
DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
DL(P->getAnalysis<DataLayoutPass>().getDataLayout()), Builder(B),
ExpGen(Builder, ClastVars) {}

namespace {
Expand Down Expand Up @@ -1074,9 +1076,11 @@ class CodeGeneration : public ScopPass {
AU.addRequired<ScopDetection>();
AU.addRequired<ScopInfo>();
AU.addRequired<DataLayoutPass>();
AU.addRequired<DataLayoutPass>();
AU.addRequired<LoopInfo>();

AU.addPreserved<CloogInfo>();
AU.addPreserved<DataLayoutPass>();
AU.addPreserved<Dependences>();
AU.addPreserved<LoopInfo>();
AU.addPreserved<DominatorTreeWrapperPass>();
Expand All @@ -1103,6 +1107,7 @@ INITIALIZE_PASS_DEPENDENCY(CloogInfo);
INITIALIZE_PASS_DEPENDENCY(Dependences);
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
INITIALIZE_PASS_DEPENDENCY(DataLayoutPass);
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution);
INITIALIZE_PASS_DEPENDENCY(ScopDetection);
INITIALIZE_PASS_DEPENDENCY(DataLayoutPass);
Expand Down

0 comments on commit 12b355a

Please sign in to comment.