67 changes: 62 additions & 5 deletions clang/lib/CodeGen/CGOpenMPRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H

#include "clang/AST/Type.h"
#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/DenseMap.h"
Expand Down Expand Up @@ -92,6 +93,13 @@ class CGOpenMPRuntime {
OMPRTL__kmpc_single,
// Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
OMPRTL__kmpc_end_single,
// Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
// kmp_routine_entry_t *task_entry);
OMPRTL__kmpc_omp_task_alloc,
// Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
// new_task);
OMPRTL__kmpc_omp_task,
};

/// \brief Values for bit flags used in the ident_t to describe the fields.
Expand Down Expand Up @@ -190,6 +198,12 @@ class CGOpenMPRuntime {
/// variables.
llvm::StringMap<llvm::AssertingVH<llvm::Constant>, llvm::BumpPtrAllocator>
InternalVars;
/// \brief Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);
llvm::Type *KmpRoutineEntryPtrTy;
QualType KmpRoutineEntryPtrQTy;

/// \brief Build type kmp_routine_entry_t (if not built yet).
void emitKmpRoutineEntryT(QualType KmpInt32Ty);

/// \brief Emits object of ident_t type with info for source location.
/// \param Flags Flags for OpenMP location.
Expand Down Expand Up @@ -257,24 +271,34 @@ class CGOpenMPRuntime {
explicit CGOpenMPRuntime(CodeGenModule &CGM);
virtual ~CGOpenMPRuntime() {}

/// \brief Emits outlined function for the specified OpenMP directive \a D
/// (required for parallel and task directives). This outlined function has
/// type void(*)(kmp_int32 /*ThreadID*/, kmp_int32 /*BoundID*/, struct
/// context_vars*).
/// \brief Emits outlined function for the specified OpenMP directive \a D.
/// This outlined function has type void(*)(kmp_int32 *ThreadID, kmp_int32
/// BoundID, struct context_vars*).
/// \param D OpenMP directive.
/// \param ThreadIDVar Variable for thread id in the current OpenMP region.
///
virtual llvm::Value *emitOutlinedFunction(const OMPExecutableDirective &D,
const VarDecl *ThreadIDVar);

/// \brief Emits outlined function for the OpenMP task directive \a D. This
/// outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32
/// PartID, struct context_vars*).
/// \param D OpenMP directive.
/// \param ThreadIDVar Variable for thread id in the current OpenMP region.
/// \param PartID If not nullptr - variable used for part id in tasks.
///
virtual llvm::Value *emitTaskOutlinedFunction(const OMPExecutableDirective &D,
const VarDecl *ThreadIDVar,
const VarDecl *PartIDVar);

/// \brief Cleans up references to the objects in finished function.
///
void functionFinished(CodeGenFunction &CGF);

/// \brief Emits code for parallel call of the \a OutlinedFn with variables
/// captured in a record which address is stored in \a CapturedStruct.
/// \param OutlinedFn Outlined function to be run in parallel threads. Type of
/// this function is void(*)(kmp_int32, kmp_int32, struct context_vars*).
/// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
/// \param CapturedStruct A pointer to the record with the references to
/// variables used in \a OutlinedFn function.
///
Expand Down Expand Up @@ -412,6 +436,39 @@ class CGOpenMPRuntime {
/// \param Vars List of variables to flush.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars,
SourceLocation Loc);

/// \brief Emit task region for the task directive. The task region is
/// emmitted in several steps:
/// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
/// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
/// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
/// function:
/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
/// TaskFunction(gtid, tt->part_id, tt->shareds);
/// return 0;
/// }
/// 2. Copy a list of shared variables to field shareds of the resulting
/// structure kmp_task_t returned by the previous call (if any).
/// 3. Copy a pointer to destructions function to field destructions of the
/// resulting structure kmp_task_t.
/// 4. Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid,
/// kmp_task_t *new_task), where new_task is a resulting structure from
/// previous items.
/// \param Tied true if the task is tied (the task is tied to the thread that
/// can suspend its task region), false - untied (the task is not tied to any
/// thread).
/// \param Final Contains either constant bool value, or llvm::Value * of i1
/// type for final clause. If the value is true, the task forces all of its
/// child tasks to become final and included tasks.
/// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
/// /*part_id*/, captured_struct */*__context*/);
/// \param SharedsTy A type which contains references the shared variables.
/// \param Shareds Context with the list of shared variables from the \a
/// TaskFunction.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
llvm::Value *TaskFunction, QualType SharedsTy,
llvm::Value *Shareds);
};

/// \brief RAII for emitting code of CapturedStmt without function outlining.
Expand Down
31 changes: 29 additions & 2 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -717,8 +717,35 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet.");
}

void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &) {
llvm_unreachable("CodeGen for 'omp task' is not supported yet.");
void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
// Emit outlined function for task construct.
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
auto *I = CS->getCapturedDecl()->param_begin();
// The first function argument for tasks is a thread id, the second one is a
// part id (0 for tied tasks, >=0 for untied task).
auto OutlinedFn =
CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, *std::next(I));
// Check if we should emit tied or untied task.
bool Tied = !S.getSingleClause(OMPC_untied);
// Check if the task is final
llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
if (auto *Clause = S.getSingleClause(OMPC_final)) {
// If the condition constant folds and can be elided, try to avoid emitting
// the condition and the dead arm of the if/else.
auto *Cond = cast<OMPFinalClause>(Clause)->getCondition();
bool CondConstant;
if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
Final.setInt(CondConstant);
else
Final.setPointer(EvaluateExprAsBool(Cond));
} else {
// By default the task is not final.
Final.setInt(/*IntVal=*/false);
}
auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final,
OutlinedFn, SharedsTy, CapturedStruct);
}

void CodeGenFunction::EmitOMPTaskyieldDirective(
Expand Down
11 changes: 9 additions & 2 deletions clang/lib/Sema/SemaOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,7 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(StackTy::reverse_iterator Iter,
// bound to the current team is shared.
if (DVar.DKind == OMPD_task) {
DSAVarData DVarTemp;
for (StackTy::reverse_iterator I = std::next(Iter),
EE = std::prev(Stack.rend());
for (StackTy::reverse_iterator I = std::next(Iter), EE = Stack.rend();
I != EE; ++I) {
// OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables
// Referenced
Expand Down Expand Up @@ -1122,11 +1121,19 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
break;
}
case OMPD_task: {
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
Sema::CapturedParamNameType Params[] = {
std::make_pair(".global_tid.", KmpInt32Ty),
std::make_pair(".part_id.", KmpInt32Ty),
std::make_pair(StringRef(), QualType()) // __context with shared vars
};
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
Params);
// Mark this captured region as inlined, because we don't use outlined
// function directly.
getCurCapturedRegion()->TheCapturedDecl->addAttr(
AlwaysInlineAttr::CreateImplicit(
Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange()));
break;
}
case OMPD_ordered: {
Expand Down
102 changes: 102 additions & 0 deletions clang/test/OpenMP/task_codegen.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp=libiomp5 -x c++ -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// expected-no-diagnostics

#ifndef HEADER
#define HEADER

// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* }
// CHECK-DAG: [[STRUCT_SHAREDS:%.+]] = type { i8*, [[STRUCT_S:%.+]]* }
// CHECK-DAG: [[KMP_TASK_T:%.+]] = type { i8*, i32 (i32, i8*)*, i32, i32 (i32, i8*)* }
struct S {
int a;
S() : a(0) {}
S(const S &s) : a(s.a) {}
~S() {}
};
int a;
// CHECK-LABEL : @main
int main() {
// CHECK: [[B:%.+]] = alloca i8
// CHECK: [[S:%.+]] = alloca [[STRUCT_S]]
char b;
S s;
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* @{{.+}})
// CHECK: [[B_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]], [[STRUCT_SHAREDS]]* [[CAPTURES:%.+]], i32 0, i32 0
// CHECK: store i8* [[B]], i8** [[B_REF]]
// CHECK: [[S_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]], [[STRUCT_SHAREDS]]* [[CAPTURES]], i32 0, i32 1
// CHECK: store [[STRUCT_S]]* [[S]], [[STRUCT_S]]** [[S_REF]]
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 32, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY1:@.+]] to i32 (i32, i8*)*))
// CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0
// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_PTR]]
// CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS]]* [[CAPTURES]] to i8*
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[BITCAST]], i64 16, i32 8, i1 false)
// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
#pragma omp task shared(a, b, s)
{
a = 15;
b = a;
s.a = 10;
}
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
#pragma omp task untied
{
a = 1;
}
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 3, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
#pragma omp task final(true)
{
a = 2;
}
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.*}}, i32 0, i32 3
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
const bool flag = false;
#pragma omp task final(flag)
{
a = 3;
}
// CHECK: [[B_VAL:%.+]] = load i8, i8* [[B]]
// CHECK: [[CMP:%.+]] = icmp ne i8 [[B_VAL]], 0
// CHECK: [[FINAL:%.+]] = select i1 [[CMP]], i32 2, i32 0
// CHECK: [[FLAGS:%.+]] = or i32 [[FINAL]], 1
// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 [[FLAGS]], i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY5:@.+]] to i32 (i32, i8*)*))
// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3
// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
#pragma omp task final(b)
{
a = 4;
}
return a;
}
// CHECK: define internal i32 [[TASK_ENTRY1]](i32, [[KMP_TASK_T]]{{.*}}*)
// CHECK: store i32 15, i32* [[A_PTR:@.+]]
// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PTR]]
// CHECK: [[A_VAL_I8:%.+]] = trunc i32 [[A_VAL]] to i8
// CHECK: store i8 [[A_VAL_I8]], i8* %{{.+}}
// CHECK: store i32 10, i32* %{{.+}}

// CHECK: define internal i32 [[TASK_ENTRY2]](i32, [[KMP_TASK_T]]{{.*}}*)
// CHECK: store i32 1, i32* [[A_PTR:@.+]]

// CHECK: define internal i32 [[TASK_ENTRY3]](i32, [[KMP_TASK_T]]{{.*}}*)
// CHECK: store i32 2, i32* [[A_PTR:@.+]]

// CHECK: define internal i32 [[TASK_ENTRY4]](i32, [[KMP_TASK_T]]{{.*}}*)
// CHECK: store i32 3, i32* [[A_PTR:@.+]]

// CHECK: define internal i32 [[TASK_ENTRY5]](i32, [[KMP_TASK_T]]{{.*}}*)
// CHECK: store i32 4, i32* [[A_PTR:@.+]]
#endif