Skip to content

Commit

Permalink
[Driver][OpenMP] Add specialized action builder for OpenMP offloading…
Browse files Browse the repository at this point in the history
… actions.

Summary:
This patch adds a new specialized action builder to create OpenMP offloading actions. The specialized builder is added to the action builder already containing the CUDA specialized builder.

OpenMP offloading dependences between host and device actions (expressed with OffloadActions) are different that what is used for CUDA:
 - Device compile action depends on the host compile action - the device frontend extracts the information about the declarations that have to be emitted by looking into the metadata produced by the host frontend.
 - The host link action depends on the device link actions - the device images are embedded in the host binary at link time.

Reviewers: echristo, tra, rsmith, jlebar, ABataev, hfinkel

Subscribers: mkuron, whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin

Differential Revision: https://reviews.llvm.org/D21845

llvm-svn: 285314
  • Loading branch information
Samuel Antao committed Oct 27, 2016
1 parent 47c1ff7 commit 28c4f18
Show file tree
Hide file tree
Showing 2 changed files with 261 additions and 6 deletions.
129 changes: 123 additions & 6 deletions clang/lib/Driver/Driver.cpp
Expand Up @@ -1544,8 +1544,9 @@ class OffloadingActionBuilder final {
/// added to the provided host action \a HostAction. By default it is
/// inactive.
virtual ActionBuilderReturnCode
getDeviceDepences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase,
phases::ID FinalPhase, PhasesTy &Phases) {
getDeviceDependences(OffloadAction::DeviceDependences &DA,
phases::ID CurPhase, phases::ID FinalPhase,
PhasesTy &Phases) {
return ABRT_Inactive;
}

Expand Down Expand Up @@ -1603,8 +1604,9 @@ class OffloadingActionBuilder final {
: DeviceActionBuilder(C, Args, Inputs, Action::OFK_Cuda) {}

ActionBuilderReturnCode
getDeviceDepences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase,
phases::ID FinalPhase, PhasesTy &Phases) override {
getDeviceDependences(OffloadAction::DeviceDependences &DA,
phases::ID CurPhase, phases::ID FinalPhase,
PhasesTy &Phases) override {
if (!IsActive)
return ABRT_Inactive;

Expand Down Expand Up @@ -1828,7 +1830,118 @@ class OffloadingActionBuilder final {
}
};

/// Add the implementation for other specialized builders here.
/// OpenMP action builder. The host bitcode is passed to the device frontend
/// and all the device linked images are passed to the host link phase.
class OpenMPActionBuilder final : public DeviceActionBuilder {
/// The OpenMP actions for the current input.
ActionList OpenMPDeviceActions;

/// The linker inputs obtained for each toolchain.
SmallVector<ActionList, 8> DeviceLinkerInputs;

public:
OpenMPActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs)
: DeviceActionBuilder(C, Args, Inputs, Action::OFK_OpenMP) {}

ActionBuilderReturnCode
getDeviceDependences(OffloadAction::DeviceDependences &DA,
phases::ID CurPhase, phases::ID FinalPhase,
PhasesTy &Phases) override {

// We should always have an action for each input.
assert(OpenMPDeviceActions.size() == ToolChains.size() &&
"Number of OpenMP actions and toolchains do not match.");

// The host only depends on device action in the linking phase, when all
// the device images have to be embedded in the host image.
if (CurPhase == phases::Link) {
assert(ToolChains.size() == DeviceLinkerInputs.size() &&
"Toolchains and linker inputs sizes do not match.");
auto LI = DeviceLinkerInputs.begin();
for (auto *A : OpenMPDeviceActions) {
LI->push_back(A);
++LI;
}

// We passed the device action as a host dependence, so we don't need to
// do anything else with them.
OpenMPDeviceActions.clear();
return ABRT_Success;
}

// By default, we produce an action for each device arch.
for (Action *&A : OpenMPDeviceActions)
A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);

return ABRT_Success;
}

ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override {

// If this is an input action replicate it for each OpenMP toolchain.
if (auto *IA = dyn_cast<InputAction>(HostAction)) {
OpenMPDeviceActions.clear();
for (unsigned I = 0; I < ToolChains.size(); ++I)
OpenMPDeviceActions.push_back(
C.MakeAction<InputAction>(IA->getInputArg(), IA->getType()));
return ABRT_Success;
}

// When generating code for OpenMP we use the host compile phase result as
// a dependence to the device compile phase so that it can learn what
// declarations should be emitted. However, this is not the only use for
// the host action, so we prevent it from being collapsed.
if (isa<CompileJobAction>(HostAction)) {
HostAction->setCannotBeCollapsedWithNextDependentAction();
assert(ToolChains.size() == OpenMPDeviceActions.size() &&
"Toolchains and device action sizes do not match.");
OffloadAction::HostDependence HDep(
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
/*BoundArch=*/nullptr, Action::OFK_OpenMP);
auto TC = ToolChains.begin();
for (Action *&A : OpenMPDeviceActions) {
assert(isa<CompileJobAction>(A));
OffloadAction::DeviceDependences DDep;
DDep.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP);
A = C.MakeAction<OffloadAction>(HDep, DDep);
++TC;
}
}
return ABRT_Success;
}

void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {
assert(ToolChains.size() == DeviceLinkerInputs.size() &&
"Toolchains and linker inputs sizes do not match.");

// Append a new link action for each device.
auto TC = ToolChains.begin();
for (auto &LI : DeviceLinkerInputs) {
auto *DeviceLinkAction =
C.MakeAction<LinkJobAction>(LI, types::TY_Image);
DA.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr,
Action::OFK_OpenMP);
++TC;
}
}

bool initialize() override {
// Get the OpenMP toolchains. If we don't get any, the action builder will
// know there is nothing to do related to OpenMP offloading.
auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>();
for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE;
++TI)
ToolChains.push_back(TI->second);

DeviceLinkerInputs.resize(ToolChains.size());
return false;
}
};

///
/// TODO: Add the implementation for other specialized builders here.
///

/// Specialized builders being used by this offloading action builder.
SmallVector<DeviceActionBuilder *, 4> SpecializedBuilders;
Expand All @@ -1844,6 +1957,9 @@ class OffloadingActionBuilder final {
// Create a specialized builder for CUDA.
SpecializedBuilders.push_back(new CudaActionBuilder(C, Args, Inputs));

// Create a specialized builder for OpenMP.
SpecializedBuilders.push_back(new OpenMPActionBuilder(C, Args, Inputs));

//
// TODO: Build other specialized builders here.
//
Expand Down Expand Up @@ -1886,7 +2002,8 @@ class OffloadingActionBuilder final {
continue;
}

auto RetCode = SB->getDeviceDepences(DDeps, CurPhase, FinalPhase, Phases);
auto RetCode =
SB->getDeviceDependences(DDeps, CurPhase, FinalPhase, Phases);

// If the builder explicitly says the host action should be ignored,
// we need to increment the variable that tracks the builders that request
Expand Down
138 changes: 138 additions & 0 deletions clang/test/Driver/openmp-offload.c
Expand Up @@ -2,6 +2,11 @@
/// Perform several driver tests for OpenMP offloading
///

// REQUIRES: clang-driver
// REQUIRES: x86-registered-target
// REQUIRES: powerpc-registered-target
// REQUIRES: nvptx-registered-target

/// ###########################################################################

/// Check whether an invalid OpenMP target is specified:
Expand Down Expand Up @@ -35,3 +40,136 @@
// RUN: %clang -### -ccc-print-phases -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu,powerpc64le-ibm-linux-gnu %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-DUPLICATES %s
// CHK-DUPLICATES: warning: The OpenMP offloading target 'powerpc64le-ibm-linux-gnu' is similar to target 'powerpc64le-ibm-linux-gnu' already specified - will be ignored.

/// ###########################################################################

/// Check the phases graph when using a single target, different from the host.
/// We should have an offload action joining the host compile and device
/// preprocessor and another one joining the device linking outputs to the host
/// action.
// RUN: %clang -ccc-print-phases -fopenmp -target powerpc64le-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PHASES %s
// CHK-PHASES: 0: input, "[[INPUT:.+\.c]]", c, (host-openmp)
// CHK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp)
// CHK-PHASES: 2: compiler, {1}, ir, (host-openmp)
// CHK-PHASES: 3: backend, {2}, assembler, (host-openmp)
// CHK-PHASES: 4: assembler, {3}, object, (host-openmp)
// CHK-PHASES: 5: linker, {4}, image, (host-openmp)
// CHK-PHASES: 6: input, "[[INPUT]]", c, (device-openmp)
// CHK-PHASES: 7: preprocessor, {6}, cpp-output, (device-openmp)
// CHK-PHASES: 8: compiler, {7}, ir, (device-openmp)
// CHK-PHASES: 9: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {8}, ir
// CHK-PHASES: 10: backend, {9}, assembler, (device-openmp)
// CHK-PHASES: 11: assembler, {10}, object, (device-openmp)
// CHK-PHASES: 12: linker, {11}, image, (device-openmp)
// CHK-PHASES: 13: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {5}, "device-openmp (x86_64-pc-linux-gnu)" {12}, image

/// ###########################################################################

/// Check the phases when using multiple targets. Here we also add a library to
/// make sure it is treated as input by the device.
// RUN: %clang -ccc-print-phases -lsomelib -fopenmp -target powerpc64-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PHASES-LIB %s
// CHK-PHASES-LIB: 0: input, "somelib", object, (host-openmp)
// CHK-PHASES-LIB: 1: input, "[[INPUT:.+\.c]]", c, (host-openmp)
// CHK-PHASES-LIB: 2: preprocessor, {1}, cpp-output, (host-openmp)
// CHK-PHASES-LIB: 3: compiler, {2}, ir, (host-openmp)
// CHK-PHASES-LIB: 4: backend, {3}, assembler, (host-openmp)
// CHK-PHASES-LIB: 5: assembler, {4}, object, (host-openmp)
// CHK-PHASES-LIB: 6: linker, {0, 5}, image, (host-openmp)
// CHK-PHASES-LIB: 7: input, "somelib", object, (device-openmp)
// CHK-PHASES-LIB: 8: input, "[[INPUT]]", c, (device-openmp)
// CHK-PHASES-LIB: 9: preprocessor, {8}, cpp-output, (device-openmp)
// CHK-PHASES-LIB: 10: compiler, {9}, ir, (device-openmp)
// CHK-PHASES-LIB: 11: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {10}, ir
// CHK-PHASES-LIB: 12: backend, {11}, assembler, (device-openmp)
// CHK-PHASES-LIB: 13: assembler, {12}, object, (device-openmp)
// CHK-PHASES-LIB: 14: linker, {7, 13}, image, (device-openmp)
// CHK-PHASES-LIB: 15: input, "somelib", object, (device-openmp)
// CHK-PHASES-LIB: 16: input, "[[INPUT]]", c, (device-openmp)
// CHK-PHASES-LIB: 17: preprocessor, {16}, cpp-output, (device-openmp)
// CHK-PHASES-LIB: 18: compiler, {17}, ir, (device-openmp)
// CHK-PHASES-LIB: 19: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (powerpc64-ibm-linux-gnu)" {18}, ir
// CHK-PHASES-LIB: 20: backend, {19}, assembler, (device-openmp)
// CHK-PHASES-LIB: 21: assembler, {20}, object, (device-openmp)
// CHK-PHASES-LIB: 22: linker, {15, 21}, image, (device-openmp)
// CHK-PHASES-LIB: 23: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {6}, "device-openmp (x86_64-pc-linux-gnu)" {14}, "device-openmp (powerpc64-ibm-linux-gnu)" {22}, image


/// ###########################################################################

/// Check the phases when using multiple targets and multiple source files
// RUN: echo " " > %t.c
// RUN: %clang -ccc-print-phases -lsomelib -fopenmp -target powerpc64-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s %t.c 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PHASES-FILES %s
// CHK-PHASES-FILES: 0: input, "somelib", object, (host-openmp)
// CHK-PHASES-FILES: 1: input, "[[INPUT1:.+\.c]]", c, (host-openmp)
// CHK-PHASES-FILES: 2: preprocessor, {1}, cpp-output, (host-openmp)
// CHK-PHASES-FILES: 3: compiler, {2}, ir, (host-openmp)
// CHK-PHASES-FILES: 4: backend, {3}, assembler, (host-openmp)
// CHK-PHASES-FILES: 5: assembler, {4}, object, (host-openmp)
// CHK-PHASES-FILES: 6: input, "[[INPUT2:.+\.c]]", c, (host-openmp)
// CHK-PHASES-FILES: 7: preprocessor, {6}, cpp-output, (host-openmp)
// CHK-PHASES-FILES: 8: compiler, {7}, ir, (host-openmp)
// CHK-PHASES-FILES: 9: backend, {8}, assembler, (host-openmp)
// CHK-PHASES-FILES: 10: assembler, {9}, object, (host-openmp)
// CHK-PHASES-FILES: 11: linker, {0, 5, 10}, image, (host-openmp)
// CHK-PHASES-FILES: 12: input, "somelib", object, (device-openmp)
// CHK-PHASES-FILES: 13: input, "[[INPUT1]]", c, (device-openmp)
// CHK-PHASES-FILES: 14: preprocessor, {13}, cpp-output, (device-openmp)
// CHK-PHASES-FILES: 15: compiler, {14}, ir, (device-openmp)
// CHK-PHASES-FILES: 16: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {15}, ir
// CHK-PHASES-FILES: 17: backend, {16}, assembler, (device-openmp)
// CHK-PHASES-FILES: 18: assembler, {17}, object, (device-openmp)
// CHK-PHASES-FILES: 19: input, "[[INPUT2]]", c, (device-openmp)
// CHK-PHASES-FILES: 20: preprocessor, {19}, cpp-output, (device-openmp)
// CHK-PHASES-FILES: 21: compiler, {20}, ir, (device-openmp)
// CHK-PHASES-FILES: 22: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {8}, "device-openmp (x86_64-pc-linux-gnu)" {21}, ir
// CHK-PHASES-FILES: 23: backend, {22}, assembler, (device-openmp)
// CHK-PHASES-FILES: 24: assembler, {23}, object, (device-openmp)
// CHK-PHASES-FILES: 25: linker, {12, 18, 24}, image, (device-openmp)
// CHK-PHASES-FILES: 26: input, "somelib", object, (device-openmp)
// CHK-PHASES-FILES: 27: input, "[[INPUT1]]", c, (device-openmp)
// CHK-PHASES-FILES: 28: preprocessor, {27}, cpp-output, (device-openmp)
// CHK-PHASES-FILES: 29: compiler, {28}, ir, (device-openmp)
// CHK-PHASES-FILES: 30: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (powerpc64-ibm-linux-gnu)" {29}, ir
// CHK-PHASES-FILES: 31: backend, {30}, assembler, (device-openmp)
// CHK-PHASES-FILES: 32: assembler, {31}, object, (device-openmp)
// CHK-PHASES-FILES: 33: input, "[[INPUT2]]", c, (device-openmp)
// CHK-PHASES-FILES: 34: preprocessor, {33}, cpp-output, (device-openmp)
// CHK-PHASES-FILES: 35: compiler, {34}, ir, (device-openmp)
// CHK-PHASES-FILES: 36: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {8}, "device-openmp (powerpc64-ibm-linux-gnu)" {35}, ir
// CHK-PHASES-FILES: 37: backend, {36}, assembler, (device-openmp)
// CHK-PHASES-FILES: 38: assembler, {37}, object, (device-openmp)
// CHK-PHASES-FILES: 39: linker, {26, 32, 38}, image, (device-openmp)
// CHK-PHASES-FILES: 40: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {11}, "device-openmp (x86_64-pc-linux-gnu)" {25}, "device-openmp (powerpc64-ibm-linux-gnu)" {39}, image

/// ###########################################################################

/// Check the phases graph when using a single GPU target, and check the OpenMP
/// and CUDA phases are articulated correctly.
// RUN: %clang -ccc-print-phases -fopenmp -target powerpc64le-ibm-linux-gnu -fopenmp-targets=nvptx64-nvidia-cuda -x cuda %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PHASES-WITH-CUDA %s
// CHK-PHASES-WITH-CUDA: 0: input, "[[INPUT:.+\.c]]", cuda, (host-cuda-openmp)
// CHK-PHASES-WITH-CUDA: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda-openmp)
// CHK-PHASES-WITH-CUDA: 2: compiler, {1}, ir, (host-cuda-openmp)
// CHK-PHASES-WITH-CUDA: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_20)
// CHK-PHASES-WITH-CUDA: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_20)
// CHK-PHASES-WITH-CUDA: 5: compiler, {4}, ir, (device-cuda, sm_20)
// CHK-PHASES-WITH-CUDA: 6: backend, {5}, assembler, (device-cuda, sm_20)
// CHK-PHASES-WITH-CUDA: 7: assembler, {6}, object, (device-cuda, sm_20)
// CHK-PHASES-WITH-CUDA: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {7}, object
// CHK-PHASES-WITH-CUDA: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {6}, assembler
// CHK-PHASES-WITH-CUDA: 10: linker, {8, 9}, cuda-fatbin, (device-cuda)
// CHK-PHASES-WITH-CUDA: 11: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {10}, ir
// CHK-PHASES-WITH-CUDA: 12: backend, {11}, assembler, (host-cuda-openmp)
// CHK-PHASES-WITH-CUDA: 13: assembler, {12}, object, (host-cuda-openmp)
// CHK-PHASES-WITH-CUDA: 14: linker, {13}, image, (host-cuda-openmp)
// CHK-PHASES-WITH-CUDA: 15: input, "[[INPUT]]", cuda, (device-openmp)
// CHK-PHASES-WITH-CUDA: 16: preprocessor, {15}, cuda-cpp-output, (device-openmp)
// CHK-PHASES-WITH-CUDA: 17: compiler, {16}, ir, (device-openmp)
// CHK-PHASES-WITH-CUDA: 18: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda)" {17}, ir
// CHK-PHASES-WITH-CUDA: 19: backend, {18}, assembler, (device-openmp)
// CHK-PHASES-WITH-CUDA: 20: assembler, {19}, object, (device-openmp)
// CHK-PHASES-WITH-CUDA: 21: linker, {20}, image, (device-openmp)
// CHK-PHASES-WITH-CUDA: 22: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {14}, "device-openmp (nvptx64-nvidia-cuda)" {21}, image

0 comments on commit 28c4f18

Please sign in to comment.