diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp index 117ed94a1da6f..b55509c2f28ff 100644 --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -231,10 +231,9 @@ struct RecordReplayTy { OS.close(); } - void saveKernelInputInfo(const char *Name, DeviceImageTy &Image, - void **ArgPtrs, ptrdiff_t *ArgOffsets, - int32_t NumArgs, uint64_t NumTeamsClause, - uint32_t ThreadLimitClause, uint64_t LoopTripCount) { + void saveKernelDescr(const char *Name, void **ArgPtrs, ptrdiff_t *ArgOffsets, + int32_t NumArgs, uint64_t NumTeamsClause, + uint32_t ThreadLimitClause, uint64_t LoopTripCount) { json::Object JsonKernelInfo; JsonKernelInfo["Name"] = Name; JsonKernelInfo["NumArgs"] = NumArgs; @@ -255,12 +254,6 @@ struct RecordReplayTy { JsonArgOffsets.push_back(ArgOffsets[I]); JsonKernelInfo["ArgOffsets"] = json::Value(std::move(JsonArgOffsets)); - SmallString<128> MemoryFilename = {Name, ".memory"}; - dumpDeviceMemory(MemoryFilename); - - SmallString<128> GlobalsFilename = {Name, ".globals"}; - dumpGlobals(GlobalsFilename, Image); - SmallString<128> JsonFilename = {Name, ".json"}; std::error_code EC; raw_fd_ostream JsonOS(JsonFilename.str(), EC); @@ -271,6 +264,14 @@ struct RecordReplayTy { JsonOS.close(); } + void saveKernelInput(const char *Name, DeviceImageTy &Image) { + SmallString<128> GlobalsFilename = {Name, ".globals"}; + dumpGlobals(GlobalsFilename, Image); + + SmallString<128> MemoryFilename = {Name, ".memory"}; + dumpDeviceMemory(MemoryFilename); + } + void saveKernelOutputInfo(const char *Name) { SmallString<128> OutputFilename = { Name, (isRecording() ? ".original.output" : ".replay.output")}; @@ -504,12 +505,6 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs, printLaunchInfo(GenericDevice, KernelArgs, NumThreads, NumBlocks)) return Err; - if (RecordReplay.isRecording()) - RecordReplay.saveKernelInputInfo( - getName(), getImage(), ArgPtrs, ArgOffsets, - KernelArgs.NumArgs - /* KernelLaunchEnvironment */ 1, NumBlocks, - NumThreads, KernelArgs.Tripcount); - return launchImpl(GenericDevice, NumThreads, NumBlocks, KernelArgs, KernelArgsPtr, AsyncInfoWrapper); } @@ -1411,12 +1406,21 @@ Error GenericDeviceTy::launchKernel(void *EntryPtr, void **ArgPtrs, GenericKernelTy &GenericKernel = *reinterpret_cast(EntryPtr); - if (RecordReplay.isRecording()) + if (RecordReplay.isRecording()) { RecordReplay.saveImage(GenericKernel.getName(), GenericKernel.getImage()); + RecordReplay.saveKernelInput(GenericKernel.getName(), + GenericKernel.getImage()); + } auto Err = GenericKernel.launch(*this, ArgPtrs, ArgOffsets, KernelArgs, AsyncInfoWrapper); + if (RecordReplay.isRecording()) + RecordReplay.saveKernelDescr(GenericKernel.getName(), ArgPtrs, ArgOffsets, + KernelArgs.NumArgs, KernelArgs.NumTeams[0], + KernelArgs.ThreadLimit[0], + KernelArgs.Tripcount); + // 'finalize' here to guarantee next record-replay actions are in-sync AsyncInfoWrapper.finalize(Err); @@ -1845,7 +1849,8 @@ int32_t __tgt_rtl_data_exchange(int32_t SrcDeviceId, void *SrcPtr, int32_t DstDeviceId, void *DstPtr, int64_t Size) { return __tgt_rtl_data_exchange_async(SrcDeviceId, SrcPtr, DstDeviceId, DstPtr, - Size, /* AsyncInfoPtr */ nullptr); + Size, + /* AsyncInfoPtr */ nullptr); } int32_t __tgt_rtl_data_exchange_async(int32_t SrcDeviceId, void *SrcPtr, diff --git a/openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp b/openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp index 93fc3e7853f8e..254be7db6e01a 100644 --- a/openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp +++ b/openmp/libomptarget/tools/kernelreplay/llvm-omp-kernel-replay.cpp @@ -147,7 +147,7 @@ int main(int argc, char **argv) { uint8_t *recored_data = new uint8_t[DeviceMemoryMB.get()->getBufferSize()]; std::memcpy(recored_data, const_cast(DeviceMemoryMB.get()->getBuffer().data()), - DeviceMemorySizeJson.value() * sizeof(uint8_t)); + DeviceMemoryMB.get()->getBufferSize()); __tgt_target_kernel_replay( /* Loc */ nullptr, DeviceId, KernelEntry.addr, (char *)recored_data,