Skip to content

Commit

Permalink
Optional calls to dump LLVM IR pass execution timing info
Browse files Browse the repository at this point in the history
Helps finding the compilation time bottlenecks.
  • Loading branch information
Henry Linjamäki authored and pjaaskel committed Oct 12, 2019
1 parent cac6cd3 commit e8973c5
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 8 deletions.
15 changes: 7 additions & 8 deletions lib/CL/devices/hsa/pocl-hsa.c
Expand Up @@ -1761,14 +1761,6 @@ pocl_hsa_launch (pocl_hsa_device_data_t *d, cl_event event)
a single-WI WG. */
kernel_packet->workgroup_size_x = kernel_packet->workgroup_size_y =
kernel_packet->workgroup_size_z = 1;

if (d->device->device_side_printf)
{
pc->printf_buffer = d->printf_buffer;
pc->printf_buffer_capacity = d->device->printf_buffer_size;
bzero (d->printf_write_pos, sizeof (size_t));
pc->printf_buffer_position = d->printf_write_pos;
}
}
else
{
Expand All @@ -1779,6 +1771,13 @@ pocl_hsa_launch (pocl_hsa_device_data_t *d, cl_event event)
kernel_packet->workgroup_size_z = run_cmd->pc.local_size[2];
}

if (d->device->device_side_printf)
{
pc->printf_buffer = d->printf_buffer;
pc->printf_buffer_capacity = d->device->printf_buffer_size;
bzero (d->printf_write_pos, sizeof (size_t));
pc->printf_buffer_position = d->printf_write_pos;
}

/* TODO: Dynamic WG sizes. */

Expand Down
32 changes: 32 additions & 0 deletions lib/CL/pocl_llvm_wg.cc
Expand Up @@ -61,10 +61,16 @@ IGNORE_COMPILER_WARNING("-Wunused-parameter")
#include <llvm/Analysis/TargetLibraryInfo.h>
#include <llvm/Analysis/TargetTransformInfo.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/PassTimingInfo.h>

#define PassManager legacy::PassManager

#include "linker.h"

// Enable to get the LLVM pass execution timing report dumped to console after
// each work-group IR function generation.
// #define DUMP_LLVM_PASS_TIMINGS

using namespace llvm;

/**
Expand Down Expand Up @@ -405,9 +411,23 @@ int pocl_llvm_generate_workgroup_function_nowrite(

KernelName = Kernel->name;

#ifdef DUMP_LLVM_PASS_TIMINGS
llvm::TimePassesIsEnabled = true;
#endif
POCL_MEASURE_START(llvm_workgroup_ir_func_gen);
#ifdef LLVM_OLDER_THAN_3_7
kernel_compiler_passes(Device, ParallelBC,
ParallelBC->getDataLayout()->getStringRepresentation())
.run(*ParallelBC);
#else
kernel_compiler_passes(Device, ParallelBC,
ParallelBC->getDataLayout().getStringRepresentation())
.run(*ParallelBC);
#endif
POCL_MEASURE_FINISH(llvm_workgroup_ir_func_gen);
#ifdef DUMP_LLVM_PASS_TIMINGS
llvm::reportAndResetTimings();
#endif

assert(Output != NULL);
*Output = (void *)ParallelBC;
Expand Down Expand Up @@ -588,7 +608,13 @@ int pocl_llvm_codegen(cl_device_id Device, void *Modp, char **Output,

if (LLVMGeneratesObjectFiles) {
POCL_MSG_PRINT_LLVM("Generating an object file directly.\n");
#ifdef DUMP_LLVM_PASS_TIMINGS
llvm::TimePassesIsEnabled = true;
#endif
PMObj.run(*Input);
#ifdef DUMP_LLVM_PASS_TIMINGS
llvm::reportAndResetTimings();
#endif
std::string O = SOS.str(); // flush
const char *Cstr = O.c_str();
size_t S = O.size();
Expand Down Expand Up @@ -618,8 +644,14 @@ int pocl_llvm_codegen(cl_device_id Device, void *Modp, char **Output,
}
#endif

#ifdef DUMP_LLVM_PASS_TIMINGS
llvm::TimePassesIsEnabled = true;
#endif
// This produces the assembly text:
PMAsm.run(*Input);
#ifdef DUMP_LLVM_PASS_TIMINGS
llvm::reportAndResetTimings();
#endif

// Next call the target's assembler via the Toolchain API indirectly through
// the Driver API.
Expand Down

0 comments on commit e8973c5

Please sign in to comment.