From 5069820a434e1dab3d2cb0519a6e40c50a846eae Mon Sep 17 00:00:00 2001 From: Ziran Zhang Date: Tue, 27 Aug 2024 14:08:53 +0800 Subject: [PATCH 1/5] [SYCLomatic] Support memory APIs migration with syclcompat Signed-off-by: Ziran Zhang --- clang/lib/DPCT/APINamesMemory.inc | 46 +++++++++++---------- clang/lib/DPCT/ASTTraversal.cpp | 50 +++++++++++++---------- clang/lib/DPCT/ASTTraversal.h | 6 +++ clang/lib/DPCT/CallExprRewriterMemory.cpp | 26 ++++++++++++ 4 files changed, 84 insertions(+), 44 deletions(-) diff --git a/clang/lib/DPCT/APINamesMemory.inc b/clang/lib/DPCT/APINamesMemory.inc index b8d0def6cd20..d1f123d37105 100644 --- a/clang/lib/DPCT/APINamesMemory.inc +++ b/clang/lib/DPCT/APINamesMemory.inc @@ -247,11 +247,13 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( "cuMemcpyDtoD_v2", CALL(MapNames::getDpctNamespace() + "dpct_memcpy", ARG(0), ARG(1), ARG(2), ARG(MapNames::getDpctNamespace() + "automatic")))))) -ASSIGNABLE_FACTORY( - FEATURE_REQUEST_FACTORY(HelperFeatureEnum::device_ext, +ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( + HelperFeatureEnum::device_ext, ASSIGN_FACTORY_ENTRY( "cuMemAllocPitch_v2", DEREF(makeDerefArgCreatorWithCall(0)), - CAST(getDerefedType(0), CALL(MapNames::getDpctNamespace() + "dpct_malloc", DEREF(makeCallArgCreatorWithCall(1)), ARG(2), ARG(3)))))) + CAST(getDerefedType(0), + CALL(getMemoryHelperFunctionName("malloc"), + DEREF(makeCallArgCreatorWithCall(1)), ARG(2), ARG(3)))))) ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( makeCheckNot(CheckArgIsDefaultCudaStream(3)), @@ -426,16 +428,16 @@ ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( CheckDerefedTypeBeforeCast(0, "void *"), ASSIGN_FACTORY_ENTRY( "cudaMallocPitch", DEREF(makeDerefArgCreatorWithCall(0)), - CALL(MapNames::getDpctNamespace() + "dpct_malloc", + CALL(getMemoryHelperFunctionName("malloc"), DEREF(makeCallArgCreatorWithCall(1)), - makeCallArgCreatorWithCall(2), makeDerefArgCreatorWithCall(3))), + makeCallArgCreatorWithCall(2), + makeDerefArgCreatorWithCall(3))), ASSIGN_FACTORY_ENTRY( "cudaMallocPitch", DEREF(makeDerefArgCreatorWithCall(0)), - CAST(getDerefedType(0), - CALL(MapNames::getDpctNamespace() + "dpct_malloc", - DEREF(makeCallArgCreatorWithCall(1)), - makeCallArgCreatorWithCall(2), - makeCallArgCreatorWithCall(3))))))) + CAST(getDerefedType(0), CALL(getMemoryHelperFunctionName("malloc"), + DEREF(makeCallArgCreatorWithCall(1)), + makeCallArgCreatorWithCall(2), + makeCallArgCreatorWithCall(3))))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, @@ -621,78 +623,78 @@ ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD8_v2", - CALL(MapNames::getDpctNamespace() + "dpct_memset", + CALL(getMemoryHelperFunctionName("memset"), ARG(0), ARG(1), ARG(2))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD16_v2", - CALL(MapNames::getDpctNamespace() + "dpct_memset_d16", + CALL(getMemoryHelperFunctionName("memset_d16"), ARG(0), ARG(1), ARG(2))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD32_v2", - CALL(MapNames::getDpctNamespace() + "dpct_memset_d32", + CALL(getMemoryHelperFunctionName("memset_d32"), ARG(0), ARG(1), ARG(2))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD8Async", - CALL(MapNames::getDpctNamespace() + "async_dpct_memset", + CALL(getMemoryHelperFunctionName("memset_async"), ARG(0), ARG(1), ARG(2), DEREF(makeCallArgCreatorWithCall(3)))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD16Async", - CALL(MapNames::getDpctNamespace() + "async_dpct_memset_d16", + CALL(getMemoryHelperFunctionName("memset_d16_async"), ARG(0), ARG(1), ARG(2), DEREF(makeCallArgCreatorWithCall(3)))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD32Async", - CALL(MapNames::getDpctNamespace() + "async_dpct_memset_d32", + CALL(getMemoryHelperFunctionName("memset_d32_async"), ARG(0), ARG(1), ARG(2), DEREF(makeCallArgCreatorWithCall(3)))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD2D8_v2", - CALL(MapNames::getDpctNamespace() + "dpct_memset", + CALL(getMemoryHelperFunctionName("memset"), ARG(0), ARG(1), ARG(2), ARG(3), ARG(4))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD2D16_v2", - CALL(MapNames::getDpctNamespace() + "dpct_memset_d16", + CALL(getMemoryHelperFunctionName("memset_d16"), ARG(0), ARG(1), ARG(2), ARG(3), ARG(4))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD2D32_v2", - CALL(MapNames::getDpctNamespace() + "dpct_memset_d32", + CALL(getMemoryHelperFunctionName("memset_d32"), ARG(0), ARG(1), ARG(2), ARG(3), ARG(4))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD2D8Async", - CALL(MapNames::getDpctNamespace() + "async_dpct_memset", + CALL(getMemoryHelperFunctionName("memset_async"), ARG(0), ARG(1), ARG(2), ARG(3), ARG(4), DEREF(makeCallArgCreatorWithCall(5)))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD2D16Async", - CALL(MapNames::getDpctNamespace() + "async_dpct_memset_d16", + CALL(getMemoryHelperFunctionName("memset_d16_async"), ARG(0), ARG(1), ARG(2), ARG(3), ARG(4), DEREF(makeCallArgCreatorWithCall(5)))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD2D32Async", - CALL(MapNames::getDpctNamespace() + "async_dpct_memset_d32", + CALL(getMemoryHelperFunctionName("memset_d32_async"), ARG(0), ARG(1), ARG(2), ARG(3), ARG(4), DEREF(makeCallArgCreatorWithCall(5)))))) diff --git a/clang/lib/DPCT/ASTTraversal.cpp b/clang/lib/DPCT/ASTTraversal.cpp index 51746528205c..d461a0093d95 100644 --- a/clang/lib/DPCT/ASTTraversal.cpp +++ b/clang/lib/DPCT/ASTTraversal.cpp @@ -4503,14 +4503,15 @@ void BLASFunctionCallRule::runRule(const MatchFinder::MatchResult &Result) { " = " + MapNames::getClNamespace() + "malloc_shared(" + "1, " + DefaultQueue + ");" + getNL() + IndentStr; - SuffixInsertStr = SuffixInsertStr + getNL() + IndentStr + "int " + - ResultTempHost + " = (int)*" + ResultTempPtr + - ";" + getNL() + IndentStr + - MapNames::getDpctNamespace() + "dpct_memcpy(" + - ExprAnalysis::ref(CE->getArg(i)) + ", &" + - ResultTempHost + ", sizeof(int));" + getNL() + - IndentStr + MapNames::getClNamespace() + "free(" + - ResultTempPtr + ", " + DefaultQueue + ");"; + SuffixInsertStr = + SuffixInsertStr + getNL() + IndentStr + "int " + + ResultTempHost + " = (int)*" + ResultTempPtr + ";" + getNL() + + IndentStr + + MemoryMigrationRule::getMemoryHelperFunctionName("memcpy") + + "(" + ExprAnalysis::ref(CE->getArg(i)) + ", &" + + ResultTempHost + ", sizeof(int));" + getNL() + IndentStr + + MapNames::getClNamespace() + "free(" + ResultTempPtr + ", " + + DefaultQueue + ");"; CurrentArgumentRepl = ResultTempPtr; } else { CurrentArgumentRepl = ExprAnalysis::ref(CE->getArg(i)); @@ -4640,14 +4641,15 @@ void BLASFunctionCallRule::runRule(const MatchFinder::MatchResult &Result) { " = " + MapNames::getClNamespace() + "malloc_shared(" + "1, " + DefaultQueue + ");" + getNL() + IndentStr; - SuffixInsertStr = SuffixInsertStr + getNL() + IndentStr + "int " + - ResultTempHost + " = (int)*" + ResultTempPtr + - ";" + getNL() + IndentStr + - MapNames::getDpctNamespace() + "dpct_memcpy(" + - ExprAnalysis::ref(CE->getArg(i)) + ", &" + - ResultTempHost + ", sizeof(int));" + getNL() + - IndentStr + MapNames::getClNamespace() + "free(" + - ResultTempPtr + ", " + DefaultQueue + ");"; + SuffixInsertStr = + SuffixInsertStr + getNL() + IndentStr + "int " + + ResultTempHost + " = (int)*" + ResultTempPtr + ";" + getNL() + + IndentStr + + MemoryMigrationRule::getMemoryHelperFunctionName("memcpy") + + "(" + ExprAnalysis::ref(CE->getArg(i)) + ", &" + + ResultTempHost + ", sizeof(int));" + getNL() + IndentStr + + MapNames::getClNamespace() + "free(" + ResultTempPtr + ", " + + DefaultQueue + ");"; CurrentArgumentRepl = ResultTempPtr; } else if (ReplInfo.BufferTypeInfo[IndexTemp] == "std::complex" || @@ -5704,7 +5706,7 @@ void SOLVERFunctionCallRule::runRule(const MatchFinder::MatchResult &Result) { if (HasDeviceAttr) { report(CE->getBeginLoc(), Diagnostics::FUNCTION_CALL_IN_DEVICE, false, MapNames::ITFName.at(FuncName), - MapNames::getDpctNamespace() + "dpct_memcpy"); + MemoryMigrationRule::getMemoryHelperFunctionName("memcpy")); return; } @@ -10162,7 +10164,7 @@ void MemoryMigrationRule::mallocMigration( requestFeature(HelperFeatureEnum::device_ext); emplaceTransformation(new InsertBeforeStmt(C, OS.str())); emplaceTransformation( - new ReplaceCalleeName(C, MapNames::getDpctNamespace() + "dpct_malloc")); + new ReplaceCalleeName(C, MemoryMigrationRule::getMemoryHelperFunctionName("malloc"))); emplaceTransformation(removeArg(C, 0, *Result.SourceManager)); std::ostringstream OS2; printDerefOp(OS2, C->getArg(1)); @@ -10343,10 +10345,10 @@ void MemoryMigrationRule::memcpyMigration( if (ReplaceStr.empty()) { if (IsAsync) { - ReplaceStr = MapNames::getDpctNamespace() + "async_dpct_memcpy"; + ReplaceStr = MemoryMigrationRule::getMemoryHelperFunctionName("memcpy_async"); requestFeature(HelperFeatureEnum::device_ext); } else { - ReplaceStr = MapNames::getDpctNamespace() + "dpct_memcpy"; + ReplaceStr = MemoryMigrationRule::getMemoryHelperFunctionName("memcpy"); requestFeature(HelperFeatureEnum::device_ext); } } @@ -10757,10 +10759,10 @@ void MemoryMigrationRule::memsetMigration( bool IsAsync = NameRef.ends_with("Async"); if (IsAsync) { NameRef = NameRef.drop_back(5 /* len of "Async" */); - ReplaceStr = MapNames::getDpctNamespace() + "async_dpct_memset"; + ReplaceStr = MemoryMigrationRule::getMemoryHelperFunctionName("memset_async"); requestFeature(HelperFeatureEnum::device_ext); } else { - ReplaceStr = MapNames::getDpctNamespace() + "dpct_memset"; + ReplaceStr = MemoryMigrationRule::getMemoryHelperFunctionName("memset"); requestFeature(HelperFeatureEnum::device_ext); } @@ -11456,6 +11458,10 @@ void MemoryMigrationRule::aggregate3DVectorClassCtor( } void MemoryMigrationRule::handleDirection(const CallExpr *C, unsigned i) { + if (DpctGlobalInfo::useSYCLCompat()) { + emplaceTransformation(removeArg(C, i, DpctGlobalInfo::getSourceManager())); + return; + } if (C->getNumArgs() > i && !C->getArg(i)->isDefaultArgument()) { if (auto DRE = dyn_cast(C->getArg(i))) { if (auto Enum = dyn_cast(DRE->getDecl())) { diff --git a/clang/lib/DPCT/ASTTraversal.h b/clang/lib/DPCT/ASTTraversal.h index 6449a94bc301..cd39e793a764 100644 --- a/clang/lib/DPCT/ASTTraversal.h +++ b/clang/lib/DPCT/ASTTraversal.h @@ -1344,6 +1344,12 @@ class MemoryMigrationRule : public NamedMigrationRule { void registerMatcher(ast_matchers::MatchFinder &MF) override; void runRule(const ast_matchers::MatchFinder::MatchResult &Result); + /// Get helper function name with namespace which has 'dpct_' in dpct helper + /// functions and w/o in syclcompat. + /// If has "_async" suffix, the name in dpct helper function will have + /// 'async_' prefix and remove the suffix. + static std::string getMemoryHelperFunctionName(StringRef RawName); + private: void mallocMigration(const ast_matchers::MatchFinder::MatchResult &Result, const CallExpr *C, diff --git a/clang/lib/DPCT/CallExprRewriterMemory.cpp b/clang/lib/DPCT/CallExprRewriterMemory.cpp index 4bcd87a72fe6..502cfce0e996 100644 --- a/clang/lib/DPCT/CallExprRewriterMemory.cpp +++ b/clang/lib/DPCT/CallExprRewriterMemory.cpp @@ -12,6 +12,32 @@ namespace clang { namespace dpct { +/// Get helper function name with namespace which has 'dpct_' in dpct helper +/// functions and w/o in syclcompat. +/// If has "_async" suffix, the name in dpct helper function will have 'async_' +/// prefix and remove the suffix. +std::string getMemoryHelperFunctionName(StringRef RawName) { + const static std::string AsyncSuffix = "_async"; + const static std::string AsyncPrefix = "async_"; + + std::string Result; + llvm::raw_string_ostream OS(Result); + OS << MapNames::getDpctNamespace(); + if (!DpctGlobalInfo::useSYCLCompat()) { + if (RawName.ends_with(AsyncSuffix)) { + RawName = RawName.drop_back(AsyncSuffix.length()); + OS << AsyncPrefix; + } + OS << "dpct_"; + } + OS << RawName; + return Result; +} + +std::string MemoryMigrationRule::getMemoryHelperFunctionName(StringRef Name) { + return dpct::getMemoryHelperFunctionName(Name); +} + // clang-format off void CallExprRewriterFactoryBase::initRewriterMapMemory() { RewriterMap->merge( From a7394e148554a1d83b04c83da424bd0cf8e3a715 Mon Sep 17 00:00:00 2001 From: Ziran Zhang Date: Wed, 28 Aug 2024 16:42:03 +0800 Subject: [PATCH 2/5] Add lit tests and fix some issues --- clang/lib/DPCT/APINamesMemory.inc | 10 +- clang/lib/DPCT/ASTTraversal.cpp | 19 +- .../lib/DPCT/Rewriters/RewriterSYCLcompat.cpp | 17 + clang/test/dpct/driver-mem-syclcompat.cu | 353 ++++++ clang/test/dpct/usm-syclcompat.cu | 1092 +++++++++++++++++ 5 files changed, 1481 insertions(+), 10 deletions(-) create mode 100644 clang/test/dpct/driver-mem-syclcompat.cu create mode 100644 clang/test/dpct/usm-syclcompat.cu diff --git a/clang/lib/DPCT/APINamesMemory.inc b/clang/lib/DPCT/APINamesMemory.inc index d1f123d37105..ed5576aee733 100644 --- a/clang/lib/DPCT/APINamesMemory.inc +++ b/clang/lib/DPCT/APINamesMemory.inc @@ -266,7 +266,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "get_device", ARG(2)), false, DpctGlobalInfo::getDeviceQueueName()), - false, "prefetch", ARG(0), ARG(1))))) + DpctGlobalInfo::useSYCLCompat(), "prefetch", ARG(0), ARG(1))))) ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( checkArgSpelling(3, "CU_DEVICE_CPU"), @@ -281,7 +281,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "cpu_device"), false, DpctGlobalInfo::getDeviceQueueName()), - false, "mem_advise", ARG(0), ARG(1), ARG("0"))), + DpctGlobalInfo::useSYCLCompat(), "mem_advise", ARG(0), ARG(1), ARG("0"))), Diagnostics::DEFAULT_MEM_ADVICE, ARG(" and was set to 0")), FEATURE_REQUEST_FACTORY( @@ -290,7 +290,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( "cuMemAdvise", MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "cpu_device"), false, DpctGlobalInfo::getDeviceQueueName()), - false, "mem_advise", ARG(0), ARG(1), ARG(2)))), + DpctGlobalInfo::useSYCLCompat(), "mem_advise", ARG(0), ARG(1), ARG(2)))), CONDITIONAL_FACTORY_ENTRY( checkIsArgIntegerLiteral(2), @@ -304,7 +304,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( "get_device", ARG(3)), false, DpctGlobalInfo::getDeviceQueueName()), - false, "mem_advise", ARG(0), ARG(1), ARG("0"))), + DpctGlobalInfo::useSYCLCompat(), "mem_advise", ARG(0), ARG(1), ARG("0"))), Diagnostics::DEFAULT_MEM_ADVICE, ARG(" and was set to 0")), FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, @@ -313,7 +313,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "get_device", ARG(3)), false, DpctGlobalInfo::getDeviceQueueName()), - false, "mem_advise", ARG(0), ARG(1), ARG(2)))))) + DpctGlobalInfo::useSYCLCompat(), "mem_advise", ARG(0), ARG(1), ARG(2)))))) ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( checkIsUSM(), diff --git a/clang/lib/DPCT/ASTTraversal.cpp b/clang/lib/DPCT/ASTTraversal.cpp index d461a0093d95..8d728db89e25 100644 --- a/clang/lib/DPCT/ASTTraversal.cpp +++ b/clang/lib/DPCT/ASTTraversal.cpp @@ -10367,6 +10367,15 @@ void MemoryMigrationRule::memcpyMigration( void MemoryMigrationRule::arrayMigration( const ast_matchers::MatchFinder::MatchResult &Result, const CallExpr *C, const UnresolvedLookupExpr *ULExpr, bool IsAssigned) { + if (DpctGlobalInfo::useSYCLCompat()) { + ExprAnalysis EA; + if (ULExpr) + EA.analyze(ULExpr); + else + EA.analyze(C); + emplaceTransformation(EA.getReplacement()); + return; + } std::string Name; if (ULExpr) { Name = ULExpr->getName().getAsString(); @@ -13118,11 +13127,11 @@ void TextureRule::registerMatcher(MatchFinder &MF) { "cudaTextureDesc", "cudaResourceDesc", "cudaResourceType", "cudaTextureAddressMode", "cudaTextureFilterMode", "cudaArray", "cudaArray_t", "CUarray_st", "CUarray", "CUarray_format", - "CUarray_format_enum", "CUdeviceptr", "CUresourcetype", - "CUresourcetype_enum", "CUaddress_mode", "CUaddress_mode_enum", - "CUfilter_mode", "CUfilter_mode_enum", "CUDA_RESOURCE_DESC", - "CUDA_TEXTURE_DESC", "CUtexref", "textureReference", - "cudaMipmappedArray", "cudaMipmappedArray_t")))))) + "CUarray_format_enum", "CUresourcetype", "CUresourcetype_enum", + "CUaddress_mode", "CUaddress_mode_enum", "CUfilter_mode", + "CUfilter_mode_enum", "CUDA_RESOURCE_DESC", "CUDA_TEXTURE_DESC", + "CUtexref", "textureReference", "cudaMipmappedArray", + "cudaMipmappedArray_t")))))) .bind("texType"), this); diff --git a/clang/lib/DPCT/Rewriters/RewriterSYCLcompat.cpp b/clang/lib/DPCT/Rewriters/RewriterSYCLcompat.cpp index 53255c983a37..7e40e38da768 100644 --- a/clang/lib/DPCT/Rewriters/RewriterSYCLcompat.cpp +++ b/clang/lib/DPCT/Rewriters/RewriterSYCLcompat.cpp @@ -40,6 +40,23 @@ void initRewriterMapSYCLcompatUnsupport( RewriterMap.insert({ #include "../APINamesGraph.inc" #include "../APINamesTexture.inc" +SYCLCOMPAT_UNSUPPORT("cudaMemcpy2DArrayToArray") +SYCLCOMPAT_UNSUPPORT("cudaMemcpy2DFromArray") +SYCLCOMPAT_UNSUPPORT("cudaMemcpy2DFromArrayAsync") +SYCLCOMPAT_UNSUPPORT("cudaMemcpy2DToArray") +SYCLCOMPAT_UNSUPPORT("cudaMemcpy2DToArrayAsync") +SYCLCOMPAT_UNSUPPORT("cudaMemcpyArrayToArray") +SYCLCOMPAT_UNSUPPORT("cudaMemcpyToArray") +SYCLCOMPAT_UNSUPPORT("cudaMemcpyToArrayAsync") +SYCLCOMPAT_UNSUPPORT("cudaMemcpyFromArray") +SYCLCOMPAT_UNSUPPORT("cudaMemcpyFromArrayAsync") +SYCLCOMPAT_UNSUPPORT("cuMemcpyAtoH_v2") +SYCLCOMPAT_UNSUPPORT("cuMemcpyHtoA_v2") +SYCLCOMPAT_UNSUPPORT("cuMemcpyAtoHAsync_v2") +SYCLCOMPAT_UNSUPPORT("cuMemcpyHtoAAsync_v2") +SYCLCOMPAT_UNSUPPORT("cuMemcpyAtoD_v2") +SYCLCOMPAT_UNSUPPORT("cuMemcpyDtoA_v2") +SYCLCOMPAT_UNSUPPORT("cuMemcpyAtoA_v2") }); } diff --git a/clang/test/dpct/driver-mem-syclcompat.cu b/clang/test/dpct/driver-mem-syclcompat.cu new file mode 100644 index 000000000000..410db88affab --- /dev/null +++ b/clang/test/dpct/driver-mem-syclcompat.cu @@ -0,0 +1,353 @@ +// RUN: dpct --format-range=none -use-syclcompat -out-root %T/driver-mem-syclcompat %s --cuda-include-path="%cuda-path/include" +// RUN: FileCheck --match-full-lines --input-file %T/driver-mem-syclcompat/driver-mem-syclcompat.dp.cpp %s +// RUN: %if build_lit %{icpx -c -fsycl %T/driver-mem-syclcompat/driver-mem-syclcompat.dp.cpp -o %T/driver-mem/driver-mem-syclcompat.dp.o %} + +#include +#include +#include + +#define CALL(x) x + +void cuCheckError(CUresult err) { +} + +int main(){ + size_t result1, result2; + int size = 32; + size_t count = 32; + float* f_A; + CUresult r; + // CHECK: f_A = (float *)sycl::malloc_host(size, q_ct1); + cuMemHostAlloc((void **)&f_A, size, CU_MEMHOSTALLOC_DEVICEMAP); + + + CUdeviceptr p1; + [&p1]() { + //CHECK:p1 = 0; + p1 = 0; + }(); + + CUdeviceptr p2; + [&]() { + //CHECK:p2 = 0; + p2 = 0; + }(); + + // CHECK: syclcompat::device_ptr f_D = 0; + CUdeviceptr f_D = 0; + // CHECK: syclcompat::device_ptr f_D2 = 0; + CUdeviceptr f_D2 = 0; + // CHECK: int c1, c2; + CUcontext c1, c2; + // CHECK: f_D = (syclcompat::device_ptr)sycl::malloc_device(size, q_ct1); + cuMemAlloc(&f_D, size); + + // CHECK: syclcompat::queue_ptr stream; + CUstream stream; + // CHECK: stream->memcpy(f_D, f_A, size); + cuMemcpyHtoDAsync(f_D, f_A, size, stream); + // CHECK: q_ct1.memcpy(f_D, f_A, size); + cuMemcpyHtoDAsync(f_D, f_A, size, 0); + // CHECK: CALL(q_ct1.memcpy(f_D, f_A, size).wait()); + CALL(cuMemcpyHtoD(f_D, f_A, size)); + + // CHECK: stream->memcpy(f_A, f_D, size); + cuMemcpyDtoHAsync(f_A, f_D, size, stream); + // CHECK: q_ct1.memcpy(f_A, f_D, size); + cuMemcpyDtoHAsync(f_A, f_D, size, 0); + // CHECK: q_ct1.memcpy(f_A, f_D, size).wait(); + cuMemcpyDtoH(f_A, f_D, size); + + // CHECK: stream->memcpy(f_D, f_D2, size); + cuMemcpyDtoDAsync(f_D, f_D2, size, stream); + // CHECK: r = SYCLCOMPAT_CHECK_ERROR(stream->memcpy(f_D, f_D2, size)); + r = cuMemcpyDtoDAsync(f_D, f_D2, size, stream); + + // CHECK: q_ct1.memcpy(f_D, f_D2, size); + cuMemcpyDtoDAsync(f_D, f_D2, size, 0); + // CHECK: r = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(f_D, f_D2, size)); + r = cuMemcpyDtoDAsync(f_D, f_D2, size, 0); + + // CHECK: q_ct1.memcpy(f_D, f_D2, size).wait(); + cuMemcpyDtoD(f_D, f_D2, size); + // CHECK: r = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(f_D, f_D2, size).wait()); + r = cuMemcpyDtoD(f_D, f_D2, size); + + // CHECK: q_ct1.memcpy(f_D, f_D2, size).wait(); + cuMemcpy(f_D, f_D2, size); + // CHECK: CALL(q_ct1.memcpy(f_D, f_D2, size).wait()); + CALL(cuMemcpy(f_D, f_D2, size)); + // CHECK: r = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(f_D, f_D2, size).wait()); + r = cuMemcpy(f_D, f_D2, size); + + // CHECK: stream->memcpy(f_D, f_D2, size); + cuMemcpyAsync(f_D, f_D2, size, stream); + // CHECK: CALL(stream->memcpy(f_D, f_D2, size)); + CALL(cuMemcpyAsync(f_D, f_D2, size, stream)); + // CHECK: r = SYCLCOMPAT_CHECK_ERROR(stream->memcpy(f_D, f_D2, size)); + r = cuMemcpyAsync(f_D, f_D2, size, stream); + + // CHECK: q_ct1.memcpy(f_D, f_D2, size); + cuMemcpyAsync(f_D, f_D2, size, 0); + // CHECK: CALL(q_ct1.memcpy(f_D, f_D2, size)); + CALL(cuMemcpyAsync(f_D, f_D2, size, 0)); + // CHECK: r = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(f_D, f_D2, size)); + r = cuMemcpyAsync(f_D, f_D2, size, 0); + + // CHECK: syclcompat::memcpy(f_D, c1, f_D2, c2, size); + cuMemcpyPeer(f_D, c1, f_D2, c2, size); + // CHECK: /* + // CHECK-NEXT: DPCT1124:{{[0-9]+}}: cuMemcpyPeerAsync is migrated to asynchronous memcpy API. While the origin API might be synchronous, it depends on the type of operand memory, so you may need to call wait() on event return by memcpy API to ensure synchronization behavior. + // CHECK-NEXT: */ + // CHECK-NEXT: syclcompat::memcpy_async(f_D, c1, f_D2, c2, size, *stream); + cuMemcpyPeerAsync(f_D, c1, f_D2, c2, size, stream); + + unsigned int v32 = 50000; + unsigned short v16 = 20000; + unsigned char v8 = (unsigned char) 200; + //CHECK: syclcompat::memset_d32(f_D, v32, size); + //CHECK-NEXT: syclcompat::memset_d16(f_D, v16, size * 2); + //CHECK-NEXT: syclcompat::memset(f_D, v8, size * 4); + //CHECK-NEXT: syclcompat::memset_d32_async(f_D, v32, size, *stream); + //CHECK-NEXT: syclcompat::memset_d16_async(f_D, v16, size * 2, *stream); + //CHECK-NEXT: syclcompat::memset_async(f_D, v8, size * 4, *stream); + //CHECK-NEXT: syclcompat::memset_d32(f_D, 1, v32, 4, 6); + //CHECK-NEXT: syclcompat::memset_d16(f_D, 1, v16, 4 * 2, 6); + //CHECK-NEXT: syclcompat::memset(f_D, 1, v8, 4 * 4, 6); + //CHECK-NEXT: syclcompat::memset_d32_async(f_D, 1, v32, 4, 6, *stream); + //CHECK-NEXT: syclcompat::memset_d16_async(f_D, 1, v16, 4 * 2, 6, *stream); + //CHECK-NEXT: syclcompat::memset_async(f_D, 1, v8, 4 * 4, 6, *stream); + cuMemsetD32(f_D, v32, size); + cuMemsetD16(f_D, v16, size * 2); + cuMemsetD8(f_D, v8, size * 4); + cuMemsetD32Async(f_D, v32, size, stream); + cuMemsetD16Async(f_D, v16, size * 2, stream); + cuMemsetD8Async(f_D, v8, size * 4, stream); + cuMemsetD2D32(f_D, 1, v32, 4, 6); + cuMemsetD2D16(f_D, 1, v16, 4 * 2, 6); + cuMemsetD2D8(f_D, 1, v8, 4 * 4, 6); + cuMemsetD2D32Async(f_D, 1, v32, 4, 6, stream); + cuMemsetD2D16Async(f_D, 1, v16, 4 * 2, 6, stream); + cuMemsetD2D8Async(f_D, 1, v8, 4 * 4, 6, stream); + + // CHECK: syclcompat::memcpy_parameter cpy; + // CHECK-NEXT: cpy.to.pitched.set_data_ptr(f_A); + // CHECK-NEXT: cpy.to.pitched.set_pitch(20); + // CHECK-NEXT: cpy.to.pos[1] = 10; + // CHECK-NEXT: cpy.to.pos[0] = 15; + // CHECK-EMPTY: + // CHECK-NEXT: cpy.from.pitched.set_data_ptr(f_D); + // CHECK-NEXT: cpy.from.pitched.set_pitch(20); + // CHECK-NEXT: cpy.from.pos[1] = 10; + // CHECK-NEXT: cpy.from.pos[0] = 15; + // CHECK-EMPTY: + // CHECK-NEXT: cpy.size[0] = 4; + // CHECK-NEXT: cpy.size[1] = 7; + CUDA_MEMCPY2D cpy; + cpy.dstMemoryType = CU_MEMORYTYPE_HOST; + cpy.dstHost = f_A; + cpy.dstPitch = 20; + cpy.dstY = 10; + cpy.dstXInBytes = 15; + + cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE; + cpy.srcDevice = f_D; + cpy.srcPitch = 20; + cpy.srcY = 10; + cpy.srcXInBytes = 15; + + cpy.WidthInBytes = 4; + cpy.Height = 7; + + // CHECK: syclcompat::memcpy(cpy); + cuMemcpy2D(&cpy); + // CHECK: syclcompat::memcpy_async(cpy, *stream); + cuMemcpy2DAsync(&cpy, stream); + + CUdeviceptr devicePtr; + + CUresult cu_err; + + CUdeviceptr cuDevPtr; + + CUdevice cudevice; + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: int advise = 0; + CUmem_advise advise = CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION; + + // CHECK: syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, advise); + cuMemAdvise(devicePtr, count, advise, cudevice); + + // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, advise))); + cuCheckError(cuMemAdvise(devicePtr, count, advise, cudevice)); + + // CHECK: cu_err = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, advise)); + cu_err = cuMemAdvise(devicePtr, count, advise, cudevice); + + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0); + cuMemAdvise(devicePtr, count, CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, cudevice); + + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0))); + cuCheckError(cuMemAdvise(devicePtr, count, CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, cudevice)); + + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0))); + cuCheckError(cuMemAdvise(devicePtr, count, (CUmem_advise)1, cudevice)); + + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0))); + cuCheckError(cuMemAdvise(devicePtr, count, CUmem_advise(1), cudevice)); + + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0))); + cuCheckError(cuMemAdvise(devicePtr, count, static_cast(1), cudevice)); + + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: cu_err = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0)); + cu_err = cuMemAdvise(devicePtr, count, CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, cudevice); + + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0); + cuMemAdvise(devicePtr, count, CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, cudevice); + + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: syclcompat::cpu_device().default_queue()->mem_advise(devicePtr, count, 0); + cuMemAdvise(devicePtr, count, CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, CU_DEVICE_CPU); + + + CUdeviceptr devPtr; + CUresult curesult; + // CHECK: stream->prefetch(devPtr, 100); + cuMemPrefetchAsync (devPtr, 100, cudevice, stream); + // CHECK: (*&stream)->prefetch(devPtr, 100); + cuMemPrefetchAsync (devPtr, 100, cudevice, *&stream); + // CHECK: curesult = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + curesult = cuMemPrefetchAsync (devPtr, 100, cudevice, NULL); + // CHECK: syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100); + cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamPerThread); + // CHECK: curesult = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + curesult = cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamDefault); + // CHECK: curesult = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + curesult = cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamLegacy); + // CHECK: curesult = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + curesult = cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamPerThread); + // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + cuCheckError(cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamDefault)); + // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + cuCheckError(cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamLegacy)); + // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + cuCheckError(cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamPerThread)); + + // CHECK: syclcompat::memcpy_parameter cpy2; + // CHECK-EMPTY: + // CHECK-NEXT: /* + // CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "CUarray" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK-NEXT: */ + // CHECK-NEXT: CUarray ca; + // CHECK-NEXT: cpy2.to.image = ca; + // CHECK-NEXT: cpy2.to.pitched.set_pitch(5); + // CHECK-NEXT: cpy2.to.pitched.set_y(4); + // CHECK-NEXT: cpy2.to.pos[1] = 3; + // CHECK-NEXT: cpy2.to.pos[2] = 2; + // CHECK-NEXT: cpy2.to.pos[0] = 1; + // CHECK-EMPTY: + // CHECK-NEXT: cpy2.from.pitched.set_data_ptr(f_A); + // CHECK-NEXT: cpy2.from.pitched.set_pitch(5); + // CHECK-NEXT: cpy2.from.pitched.set_y(4); + // CHECK-NEXT: cpy2.from.pos[1] = 3; + // CHECK-NEXT: cpy2.from.pos[2] = 2; + // CHECK-NEXT: cpy2.from.pos[0] = 1; + // CHECK-EMPTY: + // CHECK-NEXT: cpy2.size[0] = 3; + // CHECK-NEXT: cpy2.size[1] = 2; + // CHECK-NEXT: cpy2.size[2] = 1; + CUDA_MEMCPY3D cpy2; + + CUarray ca; + cpy2.dstMemoryType = CU_MEMORYTYPE_ARRAY; + cpy2.dstArray = ca; + cpy2.dstPitch = 5; + cpy2.dstHeight = 4; + cpy2.dstY = 3; + cpy2.dstZ = 2; + cpy2.dstXInBytes = 1; + cpy2.dstLOD = 0; + + cpy2.srcMemoryType = CU_MEMORYTYPE_HOST; + cpy2.srcHost = f_A; + cpy2.srcPitch = 5; + cpy2.srcHeight = 4; + cpy2.srcY = 3; + cpy2.srcZ = 2; + cpy2.srcXInBytes = 1; + cpy2.srcLOD = 0; + + cpy2.WidthInBytes = 3; + cpy2.Height = 2; + cpy2.Depth = 1; + + // CHECK: syclcompat::memcpy(cpy2); + cuMemcpy3D(&cpy2); + + CUstream cs; + // CHECK: syclcompat::memcpy_async(cpy2, *cs); + cuMemcpy3DAsync(&cpy2, cs); + + float *h_A = (float *)malloc(100); + // CHECK:sycl::free(h_A, q_ct1); + cuMemFreeHost(h_A); + // CHECK:sycl::free(f_D, q_ct1); + cuMemFree(f_D); + + unsigned int flags; + int host; + + + // CHECK: flags = 0; + cuMemHostGetFlags(&flags, &host); + // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(flags = 0)); + cuCheckError(cuMemHostGetFlags(&flags, &host)); + + // CHECK: /* + // CHECK-NEXT: DPCT1026:{{[0-9]+}}: The call to cuMemHostRegister was removed because SYCL currently does not support registering of existing host memory for use by device. Use USM to allocate memory for use by host and device. + // CHECK-NEXT: */ + cuMemHostRegister(h_A, count, flags); + // CHECK: /* + // CHECK-NEXT: DPCT1027:{{[0-9]+}}: The call to cuMemHostRegister was replaced with 0 because SYCL currently does not support registering of existing host memory for use by device. Use USM to allocate memory for use by host and device. + // CHECK-NEXT: */ + // CHECK-NEXT: cuCheckError(0); + cuCheckError(cuMemHostRegister(h_A, count, flags)); + + + // CHECK: /* + // CHECK-NEXT: DPCT1026:{{[0-9]+}}: The call to cuMemHostUnregister was removed because SYCL currently does not support registering of existing host memory for use by device. Use USM to allocate memory for use by host and device. + // CHECK-NEXT: */ + cuMemHostUnregister(h_A); + + // CHECK: /* + // CHECK-NEXT: DPCT1027:{{[0-9]+}}: The call to cuMemHostUnregister was replaced with 0 because SYCL currently does not support registering of existing host memory for use by device. Use USM to allocate memory for use by host and device. + // CHECK-NEXT: */ + // CHECK-NEXT:cuCheckError(0); + cuCheckError(cuMemHostUnregister(h_A)); + return 0; +} diff --git a/clang/test/dpct/usm-syclcompat.cu b/clang/test/dpct/usm-syclcompat.cu new file mode 100644 index 000000000000..58490f9c2733 --- /dev/null +++ b/clang/test/dpct/usm-syclcompat.cu @@ -0,0 +1,1092 @@ +// FIXME +// UNSUPPORTED: system-windows +// RUN: dpct --format-range=none --use-syclcompat --usm-level=restricted -out-root %T/usm-syclcompat %s --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only +// RUN: FileCheck --match-full-lines --input-file %T/usm-syclcompat/usm-syclcompat.dp.cpp %s +// RUN: %if build_lit %{icpx -c -fsycl -DBUILD_TEST %T/usm-syclcompat/usm-syclcompat.dp.cpp -o %T/usm-syclcompat/usm-syclcompat.dp.o %} + +// CHECK: #include +// CHECK-NEXT: #include +#include +#include +#include +#include +#include + +#define MY_SAFE_CALL(CALL) do { \ + int Error = CALL; \ +} while (0) + +__constant__ float constData[123 * 4]; + +int foo_b(int a){ + return 0; +} + +void foo() { + // CHECK: syclcompat::device_ext &dev_ct1 = syclcompat::get_current_device(); + // CHECK-NEXT: sycl::queue &q_ct1 = *dev_ct1.default_queue(); + size_t size = 1234567 * sizeof(float); + float *h_A = (float *)malloc(size); + float *d_A = NULL; + int errorCode; + + cudaPitchedPtr p_A; + cudaExtent e; + cudaMemcpy3DParms parms; + cudaStream_t stream; + + /// malloc + // CHECK: d_A = (float *)sycl::malloc_device(size, q_ct1); + cudaMalloc((void **)&d_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(d_A = (float *)sycl::malloc_device(size, q_ct1)); + errorCode = cudaMalloc((void **)&d_A, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(d_A = (float *)sycl::malloc_device(size, q_ct1))); + MY_SAFE_CALL(cudaMalloc((void **)&d_A, size)); + + // CHECK: d_A = (float *)sycl::malloc_device(sizeof(sycl::double2) + size, q_ct1); + // CHECK-NEXT: d_A = (float *)sycl::malloc_device(sizeof(sycl::uchar4) + size, q_ct1); + // CHECK-NEXT: d_A = (float *)sycl::malloc_device(sizeof(d_A[0]), q_ct1); + cudaMalloc((void **)&d_A, sizeof(double2) + size); + cudaMalloc((void **)&d_A, sizeof(uchar4) + size); + cudaMalloc((void **)&d_A, sizeof(d_A[0])); + + // CHECK: d_A = (float *)syclcompat::malloc(size, size, size); + cudaMallocPitch((void **)&d_A, &size, size, size); + // CHECK: p_A = syclcompat::malloc(e); + cudaMalloc3D(&p_A, e); + + // CHECK: h_A = (float *)sycl::malloc_host(size, q_ct1); + cudaHostAlloc((void **)&h_A, size, cudaHostAllocDefault); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1)); + errorCode = cudaHostAlloc((void **)&h_A, size, cudaHostAllocDefault); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1))); + MY_SAFE_CALL(cudaHostAlloc((void **)&h_A, size, cudaHostAllocDefault)); + + // CHECK: /* + // CHECK-NEXT: DPCT1048:{{[0-9]+}}: The original value cudaHostAllocDefault is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code. + // CHECK-NEXT: */ + // CHECK-NEXT: h_A = (float *)sycl::malloc_host(sizeof(sycl::double2) - size, q_ct1); + cudaHostAlloc((void **)&h_A, sizeof(double2) - size, cudaHostAllocDefault); + // CHECK: /* + // CHECK-NEXT: DPCT1048:{{[0-9]+}}: The original value cudaHostAllocDefault is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code. + // CHECK-NEXT: */ + // CHECK-NEXT: h_A = (float *)sycl::malloc_host(sizeof(sycl::uchar4) - size, q_ct1); + cudaHostAlloc((void **)&h_A, sizeof(uchar4) - size, cudaHostAllocDefault); + + void *h_B = h_A; + // CHECK: h_A = (float *)sycl::malloc_host(size, q_ct1); + cudaMallocHost((void **)&h_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1)); + errorCode = cudaMallocHost((void **)&h_A, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1))); + MY_SAFE_CALL(cudaMallocHost((void **)&h_A, size)); + + // CHECK: h_A = (float *)sycl::malloc_host(size, q_ct1); + cuMemAllocHost((void **)&h_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1)); + errorCode = cuMemAllocHost((void **)&h_A, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1))); + MY_SAFE_CALL(cuMemAllocHost((void **)&h_A, size)); + + // CHECK: h_A = (float *)sycl::malloc_host(sizeof(sycl::double2) * size, q_ct1); + // CHECK-NEXT: h_A = (float *)sycl::malloc_host(sizeof(sycl::uchar4) * size, q_ct1); + cudaMallocHost((void **)&h_A, sizeof(double2) * size); + cudaMallocHost((void **)&h_A, sizeof(uchar4) * size); + + // CHECK: h_A = (float *)sycl::malloc_host(size, q_ct1); + cudaMallocHost(&h_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1)); + errorCode = cudaMallocHost(&h_A, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1))); + MY_SAFE_CALL(cudaMallocHost(&h_A, size)); + + // CHECK: h_A = (float *)sycl::malloc_host(sizeof(sycl::double2) / size, q_ct1); + // CHECK-NEXT: h_A = (float *)sycl::malloc_host(sizeof(sycl::uchar4) / size, q_ct1); + cudaMallocHost(&h_A, sizeof(double2) / size); + cudaMallocHost(&h_A, sizeof(uchar4) / size); + + float* buffer[2]; +#define SIZE_1 (128 * 1024 * 1024) + // CHECK: *buffer = sycl::malloc_host(SIZE_1, q_ct1); + // CHECK-NEXT: *(buffer + 1) = sycl::malloc_host(SIZE_1, q_ct1); + cudaMallocHost((void**)buffer, SIZE_1 * sizeof(float)); + cudaMallocHost((void**)(buffer + 1), SIZE_1 * sizeof(float)); +#undef SIZE_1 + + // CHECK: d_A = (float *)sycl::malloc_shared(size, q_ct1); + cudaMallocManaged((void **)&d_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(d_A = (float *)sycl::malloc_shared(size, q_ct1)); + errorCode = cudaMallocManaged((void **)&d_A, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(d_A = (float *)sycl::malloc_shared(size, q_ct1))); + MY_SAFE_CALL(cudaMallocManaged((void **)&d_A, size)); + + // CHECK: d_A = (float *)sycl::malloc_shared(sizeof(sycl::double2) + size + sizeof(sycl::uchar4), q_ct1); + // CHECK-NEXT: d_A = (float *)sycl::malloc_shared(sizeof(sycl::double2) * size * sizeof(sycl::uchar4), q_ct1); + cudaMallocManaged((void **)&d_A, sizeof(double2) + size + sizeof(uchar4)); + cudaMallocManaged((void **)&d_A, sizeof(double2) * size * sizeof(uchar4)); + + CUdeviceptr* D_ptr; + // CHECK: *D_ptr = (syclcompat::device_ptr)sycl::malloc_shared(size, q_ct1); + cuMemAllocManaged(D_ptr, size, CU_MEM_ATTACH_HOST); + + /// memcpy + + // CHECK: q_ct1.memcpy(d_A, h_A, size); + cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size)); + errorCode = cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size))); + MY_SAFE_CALL(cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice)); +#define MACRO_A(x) size +#define MACRO_A2(x) MACRO_A(x) +#define MACRO_B size +#define MACOR_C(x) cudaMemcpyDeviceToHost +#define MY_SAFE_CALL2(x) MY_SAFE_CALL(x) + //CHECK: MY_SAFE_CALL2(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size))); + MY_SAFE_CALL2(cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice)); + //CHECK: MY_SAFE_CALL2(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, MACRO_B))); + MY_SAFE_CALL2(cudaMemcpy(d_A, h_A, MACRO_B, cudaMemcpyDeviceToHost)); + //CHECK: MY_SAFE_CALL2(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, MACRO_A2(1)))); + MY_SAFE_CALL2(cudaMemcpy(d_A, h_A, MACRO_A2(1), MACOR_C(1))); + //CHECK: MY_SAFE_CALL2(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, foo_b(1)))); + MY_SAFE_CALL2(cudaMemcpy(d_A, h_A, foo_b(1), MACOR_C(1))); + +#define SIZE 100 + // CHECK: q_ct1.memcpy( d_A, h_A, sizeof(double)*SIZE*SIZE ).wait(); + cudaMemcpy( d_A, h_A, sizeof(double)*SIZE*SIZE, cudaMemcpyDeviceToHost ); + + /// memcpy async + + // CHECK: q_ct1.memcpy(d_A, h_A, size); + cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size)); + errorCode = cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size))); + MY_SAFE_CALL(cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice)); + + // CHECK: q_ct1.memcpy(d_A, h_A, size); + cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, 0); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size)); + errorCode = cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, 0); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size))); + MY_SAFE_CALL(cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, 0)); + + // CHECK: stream->memcpy(d_A, h_A, size); + cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, stream); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(stream->memcpy(d_A, h_A, size)); + errorCode = cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, stream); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(stream->memcpy(d_A, h_A, size))); + MY_SAFE_CALL(cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, stream)); + + // CHECK: syclcompat::memcpy(d_A, size, h_A, size, size, size); + cudaMemcpy2D(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice); + // CHECK: syclcompat::memcpy(h_A, size, d_A, size, size, size); + cudaMemcpy2D(h_A, size, d_A, size, size, size, cudaMemcpyDeviceToHost); + + // CHECK: syclcompat::memcpy(parms); + cudaMemcpy3D(&parms); +#ifndef BUILD_TEST + struct cudaMemcpy3DParms *parms_pointer; + // Followed call can't be processed. + cudaMemcpy3D(parms_pointer); +#endif + + // CHECK: syclcompat::memcpy_async(d_A, size, h_A, size, size, size); + cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice); + // CHECK: syclcompat::memcpy_async(d_A, size, h_A, size, size, size); + cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, 0); + // CHECK: syclcompat::memcpy_async(d_A, size, h_A, size, size, size, *stream); + cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, stream); + + // CHECK: syclcompat::memcpy_async(h_A, size, d_A, size, size, size); + cudaMemcpy2DAsync(h_A, size, d_A, size, size, size, cudaMemcpyDeviceToHost); + // CHECK: syclcompat::memcpy_async(h_A, size, d_A, size, size, size); + cudaMemcpy2DAsync(h_A, size, d_A, size, size, size, cudaMemcpyDeviceToHost, 0); + // CHECK: syclcompat::memcpy_async(h_A, size, d_A, size, size, size, *stream); + cudaMemcpy2DAsync(h_A, size, d_A, size, size, size, cudaMemcpyDeviceToHost, stream); + + // CHECK: syclcompat::memcpy_async(parms); + cudaMemcpy3DAsync(&parms); + // CHECK: syclcompat::memcpy_async(parms); + cudaMemcpy3DAsync(&parms, 0); + // CHECK: syclcompat::memcpy_async(parms, *stream); + cudaMemcpy3DAsync(&parms, stream); + /// memcpy from symbol + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbol(h_A, constData, size, 1); + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbol(h_A, "constData", size, 1); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size)); + errorCode = cudaMemcpyFromSymbol(h_A, constData, size, 1); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbol(h_A, constData, size, 1)); + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbol(h_A, constData, size, 1, cudaMemcpyDeviceToHost); + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbol(h_A, "constData", size, 1, cudaMemcpyDeviceToHost); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size)); + errorCode = cudaMemcpyFromSymbol(h_A, constData, size, 1, cudaMemcpyDeviceToHost); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size).wait())); + MY_SAFE_CALL(cudaMemcpyFromSymbol(h_A, constData, size, 1, cudaMemcpyDeviceToHost)); + + /// memcpy from symbol async + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 1, cudaMemcpyDeviceToHost); + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbolAsync(h_A, "constData", size, 1, cudaMemcpyDeviceToHost); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size)); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 1, cudaMemcpyDeviceToHost); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 1, cudaMemcpyDeviceToHost)); + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 2, size); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 2, cudaMemcpyDeviceToHost, 0); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 2, size)); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 2, cudaMemcpyDeviceToHost, 0); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 2, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 2, cudaMemcpyDeviceToHost, 0)); + + // CHECK: stream->memcpy(h_A, (char *)(constData.get_ptr(*stream)) + 3, size); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, stream); + // CHECK: stream->memcpy(h_A, (char *)(constData.get_ptr(*stream)) + 3, size); + cudaMemcpyFromSymbolAsync(h_A, "constData", size, 3, cudaMemcpyDeviceToHost, stream); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(stream->memcpy(h_A, (char *)(constData.get_ptr(*stream)) + 3, size)); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, stream); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(stream->memcpy(h_A, (char *)(constData.get_ptr(*stream)) + 3, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, stream)); + + /// memcpy to symbol + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbol(constData, h_A, size, 1); + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbol("constData", h_A, size, 1); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + errorCode = cudaMemcpyToSymbol(constData, h_A, size, 1); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbol(constData, h_A, size, 1)); + + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbol(constData, h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbol("constData", h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + errorCode = cudaMemcpyToSymbol(constData, h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size).wait())); + MY_SAFE_CALL(cudaMemcpyToSymbol(constData, h_A, size, 1, cudaMemcpyHostToDevice)); + + /// memcpy to symbol async + + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbolAsync("constData", h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice)); + + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 2, h_A, size); + cudaMemcpyToSymbolAsync(constData, h_A, size, 2, cudaMemcpyHostToDevice, 0); + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 2, h_A, size); + cudaMemcpyToSymbolAsync("constData", h_A, size, 2, cudaMemcpyHostToDevice, 0); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 2, h_A, size)); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 2, cudaMemcpyHostToDevice, 0); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 2, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 2, cudaMemcpyHostToDevice, 0)); + + // CHECK: stream->memcpy((char *)(constData.get_ptr(*stream)) + 3, h_A, size); + cudaMemcpyToSymbolAsync(constData, h_A, size, 3, cudaMemcpyHostToDevice, stream); + // CHECK: stream->memcpy((char *)(constData.get_ptr(*stream)) + 3, h_A, size); + cudaMemcpyToSymbolAsync("constData", h_A, size, 3, cudaMemcpyHostToDevice, stream); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(stream->memcpy((char *)(constData.get_ptr(*stream)) + 3, h_A, size)); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 3, cudaMemcpyHostToDevice, stream); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(stream->memcpy((char *)(constData.get_ptr(*stream)) + 3, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 3, cudaMemcpyHostToDevice, stream)); + + /// memset + + // CHECK: q_ct1.memset(d_A, 23, size).wait(); + cudaMemset(d_A, 23, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size).wait()); + errorCode = cudaMemset(d_A, 23, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size).wait())); + MY_SAFE_CALL(cudaMemset(d_A, 23, size)); + + /// memset async + + // CHECK: q_ct1.memset(d_A, 23, size); + cudaMemsetAsync(d_A, 23, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size)); + errorCode = cudaMemsetAsync(d_A, 23, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size))); + MY_SAFE_CALL(cudaMemsetAsync(d_A, 23, size)); + + // CHECK: q_ct1.memset(d_A, 23, size); + cudaMemsetAsync(d_A, 23, size, 0); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size)); + errorCode = cudaMemsetAsync(d_A, 23, size, 0); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size))); + MY_SAFE_CALL(cudaMemsetAsync(d_A, 23, size, 0)); + + // CHECK: stream->memset(d_A, 23, size); + cudaMemsetAsync(d_A, 23, size, stream); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(stream->memset(d_A, 23, size)); + errorCode = cudaMemsetAsync(d_A, 23, size, stream); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(stream->memset(d_A, 23, size))); + MY_SAFE_CALL(cudaMemsetAsync(d_A, 23, size, stream)); + + // CHECK: syclcompat::memset(d_A, size, 0xf, size, size); + cudaMemset2D(d_A, size, 0xf, size, size); + // CHECK: syclcompat::memset(p_A, 0xf, e); + cudaMemset3D(p_A, 0xf, e); + + // CHECK: syclcompat::memset_async(d_A, size, 0xf, size, size); + cudaMemset2DAsync(d_A, size, 0xf, size, size); + // CHECK: syclcompat::memset_async(d_A, size, 0xf, size, size); + cudaMemset2DAsync(d_A, size, 0xf, size, size, 0); + // CHECK: syclcompat::memset_async(d_A, size, 0xf, size, size, *stream); + cudaMemset2DAsync(d_A, size, 0xf, size, size, stream); + + // CHECK: syclcompat::memset_async(p_A, 0xf, e); + cudaMemset3DAsync(p_A, 0xf, e); + // CHECK: syclcompat::memset_async(p_A, 0xf, e); + cudaMemset3DAsync(p_A, 0xf, e, 0); + // CHECK: syclcompat::memset_async(p_A, 0xf, e, *stream); + cudaMemset3DAsync(p_A, 0xf, e, stream); + + // CHECK: sycl::free(h_A, q_ct1); + cudaFreeHost(h_A); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(sycl::free(h_A, q_ct1)); + errorCode = cudaFreeHost(h_A); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(sycl::free(h_A, q_ct1))); + MY_SAFE_CALL(cudaFreeHost(h_A)); + + // CHECK: *(void **)&d_A = (float *)h_A; + cudaHostGetDevicePointer((void **)&d_A, h_A, 0); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(d_A = (float *)h_A); + errorCode = cudaHostGetDevicePointer(&d_A, h_A, 0); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(d_A = (float *)h_A)); + MY_SAFE_CALL(cudaHostGetDevicePointer(&d_A, h_A, 0)); + + // CHECK: *D_ptr = (syclcompat::device_ptr)h_A; + cuMemHostGetDevicePointer(D_ptr, h_A, 0); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(*D_ptr = (syclcompat::device_ptr)h_A); + errorCode = cuMemHostGetDevicePointer(D_ptr, h_A, 0); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(*D_ptr = (syclcompat::device_ptr)h_A)); + MY_SAFE_CALL(cuMemHostGetDevicePointer(D_ptr, h_A, 0)); + + cudaHostRegister(h_A, size, 0); + // CHECK: errorCode = 0; + errorCode = cudaHostRegister(h_A, size, 0); + // CHECK: MY_SAFE_CALL(0); + MY_SAFE_CALL(cudaHostRegister(h_A, size, 0)); + + cudaHostUnregister(h_A); + // CHECK: errorCode = 0; + errorCode = cudaHostUnregister(h_A); + // CHECK: MY_SAFE_CALL(0); + MY_SAFE_CALL(cudaHostUnregister(h_A)); +} + + +template +int foo2() { + // CHECK: syclcompat::device_ext &dev_ct1 = syclcompat::get_current_device(); + // CHECK-NEXT: sycl::queue &q_ct1 = *dev_ct1.default_queue(); + size_t size = 1234567 * sizeof(float); + float *h_A = (float *)malloc(size); + float *d_A = NULL; + int errorCode; + + cudaStream_t stream; + /// memcpy from symbol + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbol(h_A, constData, size, 1); + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbol(h_A, "constData", size, 1); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size)); + errorCode = cudaMemcpyFromSymbol(h_A, constData, size, 1); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbol(h_A, constData, size, 1)); + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbol(h_A, constData, size, 1, cudaMemcpyDeviceToHost); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size)); + errorCode = cudaMemcpyFromSymbol(h_A, constData, size, 1, cudaMemcpyDeviceToHost); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbol(h_A, constData, size, 1, cudaMemcpyDeviceToHost)); + + // CHECK: q_ct1.memcpy(h_A, constData.get_ptr(), size); + cudaMemcpyFromSymbol(h_A, constData, size); + // CHECK: q_ct1.memcpy(h_A, constData.get_ptr(), size); + cudaMemcpyFromSymbol(h_A, "constData", size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, constData.get_ptr(), size)); + errorCode = cudaMemcpyFromSymbol(h_A, constData, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, constData.get_ptr(), size).wait())); + MY_SAFE_CALL(cudaMemcpyFromSymbol(h_A, constData, size)); + + /// memcpy from symbol async + + // CHECK: q_ct1.memcpy(h_A, constData.get_ptr(), size); + cudaMemcpyFromSymbolAsync(h_A, constData, size); + // CHECK: q_ct1.memcpy(h_A, constData.get_ptr(), size); + cudaMemcpyFromSymbolAsync(h_A, "constData", size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, constData.get_ptr(), size)); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, constData.get_ptr(), size))); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size)); + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 1); + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbolAsync(h_A, "constData", size, 1); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size)); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 1); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 1)); + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 1, cudaMemcpyDeviceToHost); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size)); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 1, cudaMemcpyDeviceToHost); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 1, cudaMemcpyDeviceToHost)); + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 2, size); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 2, cudaMemcpyDeviceToHost, 0); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 2, size)); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 2, cudaMemcpyDeviceToHost, 0); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 2, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 2, cudaMemcpyDeviceToHost, 0)); + + // CHECK: stream->memcpy(h_A, (char *)(constData.get_ptr(*stream)) + 3, size); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, stream); + // CHECK: stream->memcpy(h_A, (char *)(constData.get_ptr(*stream)) + 3, size); + cudaMemcpyFromSymbolAsync(h_A, "constData", size, 3, cudaMemcpyDeviceToHost, stream); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(stream->memcpy(h_A, (char *)(constData.get_ptr(*stream)) + 3, size)); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, stream); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(stream->memcpy(h_A, (char *)(constData.get_ptr(*stream)) + 3, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, stream)); + + /// memcpy to symbol + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbol(constData, h_A, size, 1); + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbol("constData", h_A, size, 1); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + errorCode = cudaMemcpyToSymbol(constData, h_A, size, 1); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbol(constData, h_A, size, 1)); + + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbol(constData, h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + errorCode = cudaMemcpyToSymbol(constData, h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbol(constData, h_A, size, 1, cudaMemcpyHostToDevice)); + + // CHECK: q_ct1.memcpy(constData.get_ptr(), h_A, size); + cudaMemcpyToSymbol(constData, h_A, size); + // CHECK: q_ct1.memcpy(constData.get_ptr(), h_A, size); + cudaMemcpyToSymbol("constData", h_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(constData.get_ptr(), h_A, size)); + errorCode = cudaMemcpyToSymbol(constData, h_A, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(constData.get_ptr(), h_A, size).wait())); + MY_SAFE_CALL(cudaMemcpyToSymbol(constData, h_A, size)); + + /// memcpy to symbol async + // CHECK: q_ct1.memcpy(constData.get_ptr(), h_A, size); + cudaMemcpyToSymbolAsync(constData, h_A, size); + // CHECK: q_ct1.memcpy(constData.get_ptr(), h_A, size); + cudaMemcpyToSymbolAsync("constData", h_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(constData.get_ptr(), h_A, size)); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(constData.get_ptr(), h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size)); + + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbolAsync(constData, h_A, size, 1); + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbolAsync("constData", h_A, size, 1); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 1); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 1)); + + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice)); + + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 2, h_A, size); + cudaMemcpyToSymbolAsync(constData, h_A, size, 2, cudaMemcpyHostToDevice, 0); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 2, h_A, size)); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 2, cudaMemcpyHostToDevice, 0); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 2, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 2, cudaMemcpyHostToDevice, 0)); + + // CHECK: stream->memcpy((char *)(constData.get_ptr(*stream)) + 3, h_A, size); + cudaMemcpyToSymbolAsync(constData, h_A, size, 3, cudaMemcpyHostToDevice, stream); + // CHECK: stream->memcpy((char *)(constData.get_ptr(*stream)) + 3, h_A, size); + cudaMemcpyToSymbolAsync("constData", h_A, size, 3, cudaMemcpyHostToDevice, stream); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(stream->memcpy((char *)(constData.get_ptr(*stream)) + 3, h_A, size)); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 3, cudaMemcpyHostToDevice, stream); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(stream->memcpy((char *)(constData.get_ptr(*stream)) + 3, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 3, cudaMemcpyHostToDevice, stream)); +} + +template int foo2(); +template int foo2(); + +void foo3() { + size_t size = 1234567 * sizeof(float); + float *h_A = (float *)malloc(size); + float *d_A = NULL; + int errorCode; + cudaPitchedPtr p_A; + cudaExtent e; + cudaMemcpy3DParms parms; + int *data; + size_t width, height, depth, pitch, woffset, hoffset; + cudaArray_t a1; + int deviceID = 0; + + // CHECK: auto s1 = std::make_shared((syclcompat::queue_ptr)&q_ct1); + // CHECK: auto s2 = std::make_shared(&q_ct1); + // CHECK: auto s3 = std::make_shared(&q_ct1); + auto s1 = std::make_shared((cudaStream_t)cudaStreamDefault); + auto s2 = std::make_shared(cudaStreamLegacy); + auto s3 = std::make_shared(cudaStreamPerThread); + + // CHECK: q_ct1.memcpy(d_A, h_A, size); + // CHECK: q_ct1.memcpy(d_A, h_A, size); + // CHECK: q_ct1.memcpy(d_A, h_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size))); + cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamDefault); + cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamLegacy); + cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamPerThread); + errorCode = cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamDefault); + errorCode = cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamLegacy); + errorCode = cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamPerThread)); + + + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamDefault); + cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamLegacy); + cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamPerThread); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamDefault); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamLegacy); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamPerThread)); + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size); + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size); + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size))); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamDefault); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamLegacy); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamPerThread); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamDefault); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamLegacy); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamPerThread)); + + // CHECK: syclcompat::memcpy_async(d_A, size, h_A, size, size, size); + // CHECK: syclcompat::memcpy_async(d_A, size, h_A, size, size, size); + // CHECK: syclcompat::memcpy_async(d_A, size, h_A, size, size, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(d_A, size, h_A, size, size, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(d_A, size, h_A, size, size, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(d_A, size, h_A, size, size, size)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(d_A, size, h_A, size, size, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(d_A, size, h_A, size, size, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(d_A, size, h_A, size, size, size))); + cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamDefault); + cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamLegacy); + cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamPerThread); + errorCode = cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamDefault); + errorCode = cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamLegacy); + errorCode = cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamPerThread)); + + // CHECK: syclcompat::memcpy_async(parms); + // CHECK: syclcompat::memcpy_async(parms); + // CHECK: syclcompat::memcpy_async(parms); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(parms)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(parms)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(parms)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(parms))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(parms))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(parms))); + cudaMemcpy3DAsync(&parms, cudaStreamDefault); + cudaMemcpy3DAsync(&parms, cudaStreamLegacy); + cudaMemcpy3DAsync(&parms, cudaStreamPerThread); + errorCode = cudaMemcpy3DAsync(&parms, cudaStreamDefault); + errorCode = cudaMemcpy3DAsync(&parms, cudaStreamLegacy); + errorCode = cudaMemcpy3DAsync(&parms, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpy3DAsync(&parms, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpy3DAsync(&parms, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpy3DAsync(&parms, cudaStreamPerThread)); + + + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamDefault); + cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamLegacy); + cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamPerThread); + errorCode = cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamDefault); + errorCode = cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamLegacy); + errorCode = cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamPerThread)); + + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamDefault); + cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamLegacy); + cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamPerThread); + errorCode = cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamDefault); + errorCode = cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamLegacy); + errorCode = cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamPerThread)); + + + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamDefault); + cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamLegacy); + cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamPerThread); + errorCode = cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamDefault); + errorCode = cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamLegacy); + errorCode = cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamPerThread)); + + + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamDefault); + cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamLegacy); + cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamPerThread); + errorCode = cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamDefault); + errorCode = cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamLegacy); + errorCode = cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamPerThread)); + + + // CHECK: q_ct1.memset(d_A, 23, size); + // CHECK: q_ct1.memset(d_A, 23, size); + // CHECK: q_ct1.memset(d_A, 23, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size))); + cudaMemsetAsync(d_A, 23, size, cudaStreamDefault); + cudaMemsetAsync(d_A, 23, size, cudaStreamLegacy); + cudaMemsetAsync(d_A, 23, size, cudaStreamPerThread); + errorCode = cudaMemsetAsync(d_A, 23, size, cudaStreamDefault); + errorCode = cudaMemsetAsync(d_A, 23, size, cudaStreamLegacy); + errorCode = cudaMemsetAsync(d_A, 23, size, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemsetAsync(d_A, 23, size, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemsetAsync(d_A, 23, size, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemsetAsync(d_A, 23, size, cudaStreamPerThread)); + + + // CHECK: syclcompat::memset_async(d_A, size, 0xf, size, size); + // CHECK: syclcompat::memset_async(d_A, size, 0xf, size, size); + // CHECK: syclcompat::memset_async(d_A, size, 0xf, size, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(d_A, size, 0xf, size, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(d_A, size, 0xf, size, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(d_A, size, 0xf, size, size)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(d_A, size, 0xf, size, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(d_A, size, 0xf, size, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(d_A, size, 0xf, size, size))); + cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamDefault); + cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamLegacy); + cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamPerThread); + errorCode = cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamDefault); + errorCode = cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamLegacy); + errorCode = cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamPerThread)); + + + // CHECK: syclcompat::memset_async(p_A, 0xf, e); + // CHECK: syclcompat::memset_async(p_A, 0xf, e); + // CHECK: syclcompat::memset_async(p_A, 0xf, e); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(p_A, 0xf, e)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(p_A, 0xf, e)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(p_A, 0xf, e)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(p_A, 0xf, e))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(p_A, 0xf, e))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(p_A, 0xf, e))); + cudaMemset3DAsync(p_A, 0xf, e, cudaStreamDefault); + cudaMemset3DAsync(p_A, 0xf, e, cudaStreamLegacy); + cudaMemset3DAsync(p_A, 0xf, e, cudaStreamPerThread); + errorCode = cudaMemset3DAsync(p_A, 0xf, e, cudaStreamDefault); + errorCode = cudaMemset3DAsync(p_A, 0xf, e, cudaStreamLegacy); + errorCode = cudaMemset3DAsync(p_A, 0xf, e, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemset3DAsync(p_A, 0xf, e, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemset3DAsync(p_A, 0xf, e, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemset3DAsync(p_A, 0xf, e, cudaStreamPerThread)); + + + // CHECK: syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100); + // CHECK: syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100); + // CHECK: syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100))); + cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamDefault); + cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamLegacy); + cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamPerThread); + errorCode = cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamDefault); + errorCode = cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamLegacy); + errorCode = cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamPerThread)); + // CHECK: int cudevice = 0; + CUdevice cudevice = 0; + // CHECK: syclcompat::device_ptr devPtr; + CUdeviceptr devPtr; + // CHECK: syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100); + // CHECK: syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100); + // CHECK: syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamDefault); + cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamLegacy); + cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamPerThread); + errorCode = cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamDefault); + errorCode = cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamLegacy); + errorCode = cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamPerThread); + MY_SAFE_CALL(cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamDefault)); + MY_SAFE_CALL(cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamLegacy)); + MY_SAFE_CALL(cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamPerThread)); +} + +/// cuda driver memory api +void foo4(){ + size_t size = 1234567 * sizeof(float); + float *h_A = (float *)malloc(size); + + int errorCode; + // CHECK: /* + // CHECK: DPCT1048:{{[0-9]+}}: The original value CU_MEMHOSTALLOC_PORTABLE is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code. + // CHECK: */ + // CHECK: h_A = (float *)sycl::malloc_host(size, q_ct1); + cuMemHostAlloc((void **)&h_A, size, CU_MEMHOSTALLOC_PORTABLE); + // CHECK: /* + // CHECK: DPCT1048:{{[0-9]+}}: The original value CU_MEMHOSTALLOC_PORTABLE is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code. + // CHECK: */ + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1)); + errorCode = cuMemHostAlloc((void **)&h_A, size, CU_MEMHOSTALLOC_PORTABLE); + // CHECK: /* + // CHECK: DPCT1048:{{[0-9]+}}: The original value CU_MEMHOSTALLOC_PORTABLE is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code. + // CHECK: */ + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1))); + MY_SAFE_CALL(cuMemHostAlloc((void **)&h_A, size, CU_MEMHOSTALLOC_PORTABLE)); + // CHECK: /* + // CHECK: DPCT1048:{{[0-9]+}}: The original value CU_MEMHOSTALLOC_PORTABLE is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code. + // CHECK: */ + // CHECK: h_A = (float *)sycl::malloc_host(sizeof(sycl::double2) - size, q_ct1); + cuMemHostAlloc((void **)&h_A, sizeof(double2) - size, CU_MEMHOSTALLOC_PORTABLE); + // CHECK: /* + // CHECK: DPCT1048:{{[0-9]+}}: The original value CU_MEMHOSTALLOC_PORTABLE is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code. + // CHECK: */ + // CHECK: h_A = (float *)sycl::malloc_host(sizeof(sycl::uchar4) - size, q_ct1); + cuMemHostAlloc((void **)&h_A, sizeof(uchar4) - size, CU_MEMHOSTALLOC_PORTABLE); +} + +#define MY_SAFE_CALL3(CALL) { \ + cudaError Error = CALL; \ + if (Error != cudaSuccess) { \ + printf("%s\n", cudaGetErrorString(Error)); \ + exit(Error); \ + } \ +} + +void foo5(float* a) { +// CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16))); +// CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16))); +// CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); +} + + +void foo6(float* a) { + // CHECK: printf("%d\n", SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + // CHECK: printf("%d\n", SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + printf("%d\n", cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + printf("%d\n", cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); +} + +__global__ void test_kernel() {} + +int foo7() { + unsigned int mem_size; + unsigned int *h_out_data; + unsigned int *h_data; + unsigned int *d_out_data; + unsigned int *d_in_data_1; + unsigned int *d_in_data_2; + int num_data; + + for (unsigned int i = 0; i < num_data; i++) + h_data[i] = i; + // CHECK: q_ct1.memcpy(d_in_data_1, h_data, mem_size).wait(); + cudaMemcpy(d_in_data_1, h_data, mem_size, cudaMemcpyHostToDevice); + + for (unsigned int i = 0; i < num_data; i++) + h_data[i] = num_data - 1 - i; + // CHECK: q_ct1.memcpy(d_in_data_2, h_data, mem_size); + cudaMemcpy(d_in_data_2, h_data, mem_size, cudaMemcpyHostToDevice); + + test_kernel<<<3, 3>>>(); + cudaDeviceSynchronize(); + // CHECK: q_ct1.memcpy(h_out_data, d_out_data, mem_size).wait(); + cudaMemcpy(h_out_data, d_out_data, mem_size, cudaMemcpyDeviceToHost); + + return 0; +} + +int foo8() { + unsigned int mem_size; + unsigned int *h_data; + unsigned int *d_in_data_1; + unsigned int *d_in_data_2; + + // CHECK: q_ct1.memcpy(d_in_data_1, h_data, mem_size); + cudaMemcpy(d_in_data_1, h_data, mem_size, cudaMemcpyHostToDevice); + // CHECK: q_ct1.memcpy(d_in_data_2, h_data, mem_size).wait(); + cudaMemcpy(d_in_data_2, h_data, mem_size, cudaMemcpyHostToDevice); + return 0; +} + +int foo9() { + unsigned int mem_size; + unsigned int *h_data; + unsigned int *d_in_data_1; + unsigned int *d_in_data_2; + unsigned int *test = d_in_data_1; + + // CHECK: q_ct1.memcpy(d_in_data_1, h_data, mem_size).wait(); + cudaMemcpy(d_in_data_1, h_data, mem_size, cudaMemcpyHostToDevice); + test; + // CHECK: q_ct1.memcpy(d_in_data_2, h_data, mem_size).wait(); + cudaMemcpy(d_in_data_2, h_data, mem_size, cudaMemcpyHostToDevice); + return 0; +} + +int foo10(unsigned int *test) { + unsigned int mem_size; + unsigned int *data_d, *data_h; + + // CHECK: q_ct1.memcpy(data_d, data_h, mem_size).wait(); + cudaMemcpy(data_d, data_h, mem_size, cudaMemcpyHostToDevice); + test; + // CHECK: q_ct1.memcpy(data_d, data_h, mem_size).wait(); + cudaMemcpy(data_d, data_h, mem_size, cudaMemcpyHostToDevice); + return 0; +} + +unsigned int *global_test; + +int foo11() { + unsigned int mem_size; + unsigned int *data_d, *data_h; + + // CHECK: q_ct1.memcpy(data_d, data_h, mem_size).wait(); + cudaMemcpy(data_d, data_h, mem_size, cudaMemcpyHostToDevice); + global_test; + // CHECK: q_ct1.memcpy(data_d, data_h, mem_size).wait(); + cudaMemcpy(data_d, data_h, mem_size, cudaMemcpyHostToDevice); + return 0; +} + +struct TEST { + unsigned int t; + void call() { + unsigned int mem_size; + unsigned int *data_d, *data_h; + // CHECK: q_ct1.memcpy(data_d, data_h, mem_size); + cudaMemcpy(data_d, data_h, mem_size, cudaMemcpyHostToDevice); + // CHECK: q_ct1.memcpy(data_d, data_h, mem_size).wait(); + cudaMemcpy(data_d, data_h, mem_size, cudaMemcpyHostToDevice); + } +}; + +int foo12() { + TEST test; + return 0; +} + +void foo13(float* a, bool flag) { + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16))); + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + while(flag) { + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16))); + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + if(flag) { + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16))); + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(constData.get_ptr(), a, 16).wait())); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + MY_SAFE_CALL3(cudaMemcpyToSymbol(constData, a, 16)); + } else { + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(constData.get_ptr(), a, 16))); + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + MY_SAFE_CALL3(cudaMemcpyToSymbol(constData, a, 16)); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + } + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16))); + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + } + + do { + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16))); + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, constData.get_ptr(), 16).wait())); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + MY_SAFE_CALL3(cudaMemcpyFromSymbol(a, constData, 16)); + } while(flag); + + for(;;) { + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, constData.get_ptr(), 16))); + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + MY_SAFE_CALL3(cudaMemcpyFromSymbol(a, constData, 16)); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + } + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16))); + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); +} + +void foo14() { + int h_selected_num; + int *d_selected_num; + int *h_out; + int *d_out; + //CHECK:q_ct1.memcpy((void *)&h_selected_num, (void *)d_selected_num, sizeof(int)).wait(); + //CHECK-NEXT:q_ct1.memcpy((void *)h_out, (void *)d_out, h_selected_num * sizeof(int)).wait(); + cudaMemcpy((void *)&h_selected_num, (void *)d_selected_num, sizeof(int), cudaMemcpyDeviceToHost); + cudaMemcpy((void *)h_out, (void *)d_out, h_selected_num * sizeof(int), cudaMemcpyDeviceToHost); +} + +struct TEST_STR { + int a[10]; +}; + +void foo15() { + std::vector buf; + for (int i = 0; i < 32; i++) { + //CHECK: buf[i] = (volatile TEST_STR *)sycl::malloc_host(sizeof(TEST_STR), syclcompat::get_default_queue()); + cudaMallocHost(&buf[i], sizeof(TEST_STR)); + } +} + +void foo16() { + std::vector buf; + for (int i = 0; i < 32; i++) { + //CHECK: (buf.front()) = (volatile TEST_STR *)sycl::malloc_host(sizeof(TEST_STR), syclcompat::get_default_queue()); + cudaMallocHost(&buf.front(), sizeof(TEST_STR)); + } +} + +int foo17() { + unsigned int mem_size; + unsigned int *h_data; + unsigned int *d_in_data_1; + unsigned int *d_in_data_2; + + // CHECK: q_ct1.memcpy(d_in_data_1, h_data, mem_size).wait(); + cudaMemcpy(d_in_data_1, h_data, mem_size, cudaMemcpyHostToDevice); + h_data[0] = 1; + // CHECK: q_ct1.memcpy(d_in_data_2, h_data, mem_size).wait(); + cudaMemcpy(d_in_data_2, h_data, mem_size, cudaMemcpyHostToDevice); + return 0; +} From 8fc2b69029a63a439d508abafe663d437a38bab8 Mon Sep 17 00:00:00 2001 From: Ziran Zhang Date: Thu, 29 Aug 2024 11:18:22 +0800 Subject: [PATCH 3/5] Fix lit issues --- clang/lib/DPCT/ASTTraversal.cpp | 9 +- clang/lib/DPCT/ASTTraversal.h | 6 +- clang/lib/DPCT/CallExprRewriterMemory.cpp | 12 +- clang/lib/DPCT/MapNames.cpp | 366 +++++++++++------- .../lib/DPCT/Rewriters/RewriterSYCLcompat.cpp | 4 + clang/test/dpct/driver-mem-syclcompat.cu | 30 +- clang/test/dpct/usm-syclcompat.cu | 36 +- 7 files changed, 277 insertions(+), 186 deletions(-) diff --git a/clang/lib/DPCT/ASTTraversal.cpp b/clang/lib/DPCT/ASTTraversal.cpp index 8d728db89e25..b62d33f5d29a 100644 --- a/clang/lib/DPCT/ASTTraversal.cpp +++ b/clang/lib/DPCT/ASTTraversal.cpp @@ -10250,6 +10250,8 @@ void MemoryMigrationRule::memcpyMigration( // Detect if there is Async in the func name and crop the async substr std::string NameRef = Name; bool IsAsync = false; + // Whether in experimental namespace in syclcompat. + bool IsExperimentalInSYCLCompat = false; size_t AsyncLoc = NameRef.find("Async"); if (AsyncLoc != std::string::npos) { IsAsync = true; @@ -10268,6 +10270,7 @@ void MemoryMigrationRule::memcpyMigration( llvm::raw_string_ostream OS(Replacement); DerefExpr(C->getArg(0), C).print(OS); emplaceTransformation(new ReplaceStmt(C->getArg(0), Replacement)); + IsExperimentalInSYCLCompat = true; } else if (!NameRef.compare("cudaMemcpy") || NameRef.rfind("cuMemcpyDtoH", 0) == 0) { if (!NameRef.compare("cudaMemcpy")) { @@ -10345,10 +10348,12 @@ void MemoryMigrationRule::memcpyMigration( if (ReplaceStr.empty()) { if (IsAsync) { - ReplaceStr = MemoryMigrationRule::getMemoryHelperFunctionName("memcpy_async"); + ReplaceStr = MemoryMigrationRule::getMemoryHelperFunctionName( + "memcpy_async", IsExperimentalInSYCLCompat); requestFeature(HelperFeatureEnum::device_ext); } else { - ReplaceStr = MemoryMigrationRule::getMemoryHelperFunctionName("memcpy"); + ReplaceStr = MemoryMigrationRule::getMemoryHelperFunctionName( + "memcpy", IsExperimentalInSYCLCompat); requestFeature(HelperFeatureEnum::device_ext); } } diff --git a/clang/lib/DPCT/ASTTraversal.h b/clang/lib/DPCT/ASTTraversal.h index cd39e793a764..d2678b3eef1f 100644 --- a/clang/lib/DPCT/ASTTraversal.h +++ b/clang/lib/DPCT/ASTTraversal.h @@ -1348,7 +1348,11 @@ class MemoryMigrationRule : public NamedMigrationRule { /// functions and w/o in syclcompat. /// If has "_async" suffix, the name in dpct helper function will have /// 'async_' prefix and remove the suffix. - static std::string getMemoryHelperFunctionName(StringRef RawName); + /// If `ExperimentalInSYCLCompat` is true, will add `experimental` namespace + /// in syclcompat. + static std::string + getMemoryHelperFunctionName(StringRef RawName, + bool ExperimentalInSYCLCompat = false); private: void mallocMigration(const ast_matchers::MatchFinder::MatchResult &Result, diff --git a/clang/lib/DPCT/CallExprRewriterMemory.cpp b/clang/lib/DPCT/CallExprRewriterMemory.cpp index 502cfce0e996..b46025163a45 100644 --- a/clang/lib/DPCT/CallExprRewriterMemory.cpp +++ b/clang/lib/DPCT/CallExprRewriterMemory.cpp @@ -16,7 +16,10 @@ namespace dpct { /// functions and w/o in syclcompat. /// If has "_async" suffix, the name in dpct helper function will have 'async_' /// prefix and remove the suffix. -std::string getMemoryHelperFunctionName(StringRef RawName) { +/// If `ExperimentalInSYCLCompat` is true, will add `experimental` namespace +/// in syclcompat. +std::string getMemoryHelperFunctionName(StringRef RawName, + bool ExperimentalInSYCLCompat = false) { const static std::string AsyncSuffix = "_async"; const static std::string AsyncPrefix = "async_"; @@ -29,13 +32,16 @@ std::string getMemoryHelperFunctionName(StringRef RawName) { OS << AsyncPrefix; } OS << "dpct_"; + } else if (ExperimentalInSYCLCompat) { + OS << "experimental::"; } OS << RawName; return Result; } -std::string MemoryMigrationRule::getMemoryHelperFunctionName(StringRef Name) { - return dpct::getMemoryHelperFunctionName(Name); +std::string MemoryMigrationRule::getMemoryHelperFunctionName( + StringRef Name, bool ExperimentalInSYCLCompat) { + return dpct::getMemoryHelperFunctionName(Name, ExperimentalInSYCLCompat); } // clang-format off diff --git a/clang/lib/DPCT/MapNames.cpp b/clang/lib/DPCT/MapNames.cpp index eb149febeed5..5a5989ce5ce3 100644 --- a/clang/lib/DPCT/MapNames.cpp +++ b/clang/lib/DPCT/MapNames.cpp @@ -329,7 +329,8 @@ void MapNames::setExplicitNamespaceMap( std::make_shared(getDpctNamespace() + "kernel_function", HelperFeatureEnum::device_ext)}, {"CUpointer_attribute", - std::make_shared(getDpctNamespace() + "pointer_attributes::type")}, + std::make_shared(getDpctNamespace() + + "pointer_attributes::type")}, {"cudaPointerAttributes", std::make_shared(getDpctNamespace() + "pointer_attributes", HelperFeatureEnum::device_ext)}, @@ -407,23 +408,25 @@ void MapNames::setExplicitNamespaceMap( {"ushort2", std::make_shared(getClNamespace() + "ushort2")}, {"ushort3", std::make_shared(getClNamespace() + "ushort3")}, {"ushort4", std::make_shared(getClNamespace() + "ushort4")}, - {"cublasHandle_t", std::make_shared( - getLibraryHelperNamespace() + "blas::descriptor_ptr", - HelperFeatureEnum::device_ext)}, + {"cublasHandle_t", + std::make_shared(getLibraryHelperNamespace() + + "blas::descriptor_ptr", + HelperFeatureEnum::device_ext)}, {"cublasStatus_t", std::make_shared("int")}, {"cublasStatus", std::make_shared("int")}, {"cublasGemmAlgo_t", std::make_shared("int")}, - {"cudaDataType_t", - std::make_shared(getLibraryHelperNamespace() + "library_data_t", - HelperFeatureEnum::device_ext)}, - {"cudaDataType", - std::make_shared(getLibraryHelperNamespace() + "library_data_t", - HelperFeatureEnum::device_ext)}, - {"cublasDataType_t", - std::make_shared(getLibraryHelperNamespace() + "library_data_t", - HelperFeatureEnum::device_ext)}, + {"cudaDataType_t", std::make_shared( + getLibraryHelperNamespace() + "library_data_t", + HelperFeatureEnum::device_ext)}, + {"cudaDataType", std::make_shared( + getLibraryHelperNamespace() + "library_data_t", + HelperFeatureEnum::device_ext)}, + {"cublasDataType_t", std::make_shared( + getLibraryHelperNamespace() + "library_data_t", + HelperFeatureEnum::device_ext)}, {"cublasComputeType_t", - std::make_shared(getLibraryHelperNamespace() + "compute_type")}, + std::make_shared(getLibraryHelperNamespace() + + "compute_type")}, {"cuComplex", std::make_shared(getClNamespace() + "float2")}, {"cuFloatComplex", @@ -437,8 +440,8 @@ void MapNames::setExplicitNamespaceMap( std::make_shared("oneapi::mkl::transpose")}, {"cublasPointerMode_t", std::make_shared("int")}, {"cublasAtomicsMode_t", std::make_shared("int")}, - {"cublasMath_t", - std::make_shared(getLibraryHelperNamespace() + "blas::math_mode")}, + {"cublasMath_t", std::make_shared( + getLibraryHelperNamespace() + "blas::math_mode")}, {"cusparsePointerMode_t", std::make_shared("int")}, {"cusparseFillMode_t", std::make_shared("oneapi::mkl::uplo")}, @@ -454,7 +457,8 @@ void MapNames::setExplicitNamespaceMap( std::make_shared("oneapi::mkl::transpose")}, {"cusparseAlgMode_t", std::make_shared("int")}, {"cusparseSolveAnalysisInfo_t", - std::make_shared("std::shared_ptr<" + getLibraryHelperNamespace() + + std::make_shared("std::shared_ptr<" + + getLibraryHelperNamespace() + "sparse::optimize_info>", HelperFeatureEnum::device_ext)}, {"thrust::device_ptr", @@ -566,25 +570,31 @@ void MapNames::setExplicitNamespaceMap( {"cudaTextureFilterMode", std::make_shared(getClNamespace() + "filtering_mode")}, {"curandGenerator_t", - std::make_shared(getLibraryHelperNamespace() + "rng::host_rng_ptr", + std::make_shared(getLibraryHelperNamespace() + + "rng::host_rng_ptr", + HelperFeatureEnum::device_ext)}, + {"curandRngType_t", + std::make_shared(getLibraryHelperNamespace() + + "rng::random_engine_type", + HelperFeatureEnum::device_ext)}, + {"curandRngType", + std::make_shared(getLibraryHelperNamespace() + + "rng::random_engine_type", HelperFeatureEnum::device_ext)}, - {"curandRngType_t", std::make_shared( - getLibraryHelperNamespace() + "rng::random_engine_type", - HelperFeatureEnum::device_ext)}, - {"curandRngType", std::make_shared( - getLibraryHelperNamespace() + "rng::random_engine_type", - HelperFeatureEnum::device_ext)}, {"curandStatus_t", std::make_shared("int")}, {"curandStatus", std::make_shared("int")}, {"curandOrdering_t", - std::make_shared(getLibraryHelperNamespace() + "rng::random_mode")}, + std::make_shared(getLibraryHelperNamespace() + + "rng::random_mode")}, {"cusparseStatus_t", std::make_shared("int")}, {"cusparseMatDescr_t", - std::make_shared("std::shared_ptr<" + getLibraryHelperNamespace() + + std::make_shared("std::shared_ptr<" + + getLibraryHelperNamespace() + "sparse::matrix_info>", HelperFeatureEnum::device_ext)}, - {"cusparseHandle_t", std::make_shared( - getLibraryHelperNamespace() + "sparse::descriptor_ptr")}, + {"cusparseHandle_t", + std::make_shared(getLibraryHelperNamespace() + + "sparse::descriptor_ptr")}, {"cudaMemoryAdvise", std::make_shared("int")}, {"cudaStreamCaptureStatus", std::make_shared( @@ -603,7 +613,10 @@ void MapNames::setExplicitNamespaceMap( std::make_shared(getDpctNamespace() + "pitched_data", HelperFeatureEnum::device_ext)}, {"cudaMemcpyKind", - std::make_shared(getDpctNamespace() + "memcpy_direction")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "memcpy_direction")}, {"CUDA_ARRAY3D_DESCRIPTOR", std::make_shared( DpctGlobalInfo::useExtBindlessImages() @@ -617,15 +630,30 @@ void MapNames::setExplicitNamespaceMap( "ext::oneapi::experimental::image_descriptor" : getDpctNamespace() + "image_matrix_desc")}, {"cudaMemcpy3DParms", - std::make_shared(getDpctNamespace() + "memcpy_parameter")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "memcpy_parameter")}, {"CUDA_MEMCPY3D", - std::make_shared(getDpctNamespace() + "memcpy_parameter")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "memcpy_parameter")}, {"cudaMemcpy3DPeerParms", - std::make_shared(getDpctNamespace() + "memcpy_parameter")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "memcpy_parameter")}, {"CUDA_MEMCPY3D_PEER", - std::make_shared(getDpctNamespace() + "memcpy_parameter")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "memcpy_parameter")}, {"CUDA_MEMCPY2D", - std::make_shared(getDpctNamespace() + "memcpy_parameter")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "memcpy_parameter")}, {"cudaComputeMode", std::make_shared("int")}, {"cudaSharedMemConfig", std::make_shared("int")}, {"cufftReal", std::make_shared("float")}, @@ -636,12 +664,12 @@ void MapNames::setExplicitNamespaceMap( std::make_shared(getClNamespace() + "double2")}, {"cufftResult_t", std::make_shared("int")}, {"cufftResult", std::make_shared("int")}, - {"cufftType_t", - std::make_shared(getLibraryHelperNamespace() + "fft::fft_type", - HelperFeatureEnum::device_ext)}, - {"cufftType", - std::make_shared(getLibraryHelperNamespace() + "fft::fft_type", - HelperFeatureEnum::device_ext)}, + {"cufftType_t", std::make_shared( + getLibraryHelperNamespace() + "fft::fft_type", + HelperFeatureEnum::device_ext)}, + {"cufftType", std::make_shared( + getLibraryHelperNamespace() + "fft::fft_type", + HelperFeatureEnum::device_ext)}, {"cufftHandle", std::make_shared( getLibraryHelperNamespace() + "fft::fft_engine_ptr", HelperFeatureEnum::device_ext)}, @@ -709,17 +737,18 @@ void MapNames::setExplicitNamespaceMap( getClNamespace() + "marray<" + getClNamespace() + "ext::oneapi::bfloat16, 2>")}, {"libraryPropertyType_t", - std::make_shared(getLibraryHelperNamespace() + "version_field", - HelperFeatureEnum::device_ext)}, - {"libraryPropertyType", - std::make_shared(getLibraryHelperNamespace() + "version_field", + std::make_shared(getLibraryHelperNamespace() + + "version_field", HelperFeatureEnum::device_ext)}, + {"libraryPropertyType", std::make_shared( + getLibraryHelperNamespace() + "version_field", + HelperFeatureEnum::device_ext)}, {"ncclUniqueId", std::make_shared("oneapi::ccl::kvs::address_type", HelperFeatureEnum::device_ext)}, - {"ncclComm_t", - std::make_shared(getLibraryHelperNamespace() + "ccl::comm_ptr", - HelperFeatureEnum::device_ext)}, + {"ncclComm_t", std::make_shared( + getLibraryHelperNamespace() + "ccl::comm_ptr", + HelperFeatureEnum::device_ext)}, {"ncclRedOp_t", std::make_shared("oneapi::ccl::reduction")}, {"ncclDataType_t", std::make_shared("oneapi::ccl::datatype")}, @@ -733,22 +762,28 @@ void MapNames::setExplicitNamespaceMap( {"CUuuid", std::make_shared("std::array")}, {"cusparseIndexType_t", - std::make_shared(getLibraryHelperNamespace() + "library_data_t")}, - {"cusparseFormat_t", std::make_shared( - getLibraryHelperNamespace() + "sparse::matrix_format")}, + std::make_shared(getLibraryHelperNamespace() + + "library_data_t")}, + {"cusparseFormat_t", + std::make_shared(getLibraryHelperNamespace() + + "sparse::matrix_format")}, {"cusparseDnMatDescr_t", - std::make_shared("std::shared_ptr<" + getLibraryHelperNamespace() + + std::make_shared("std::shared_ptr<" + + getLibraryHelperNamespace() + "sparse::dense_matrix_desc>")}, {"cusparseConstDnMatDescr_t", - std::make_shared("std::shared_ptr<" + getLibraryHelperNamespace() + + std::make_shared("std::shared_ptr<" + + getLibraryHelperNamespace() + "sparse::dense_matrix_desc>")}, {"cusparseOrder_t", std::make_shared("oneapi::mkl::layout")}, {"cusparseDnVecDescr_t", - std::make_shared("std::shared_ptr<" + getLibraryHelperNamespace() + + std::make_shared("std::shared_ptr<" + + getLibraryHelperNamespace() + "sparse::dense_vector_desc>")}, {"cusparseConstDnVecDescr_t", - std::make_shared("std::shared_ptr<" + getLibraryHelperNamespace() + + std::make_shared("std::shared_ptr<" + + getLibraryHelperNamespace() + "sparse::dense_vector_desc>")}, {"cusparseSpMatDescr_t", std::make_shared(getLibraryHelperNamespace() + @@ -773,21 +808,22 @@ void MapNames::setExplicitNamespaceMap( {"cudaLaunchAttributeValue", std::make_shared("int")}, {"cusparseSpSMDescr_t", std::make_shared("int")}, {"cusparseSpSMAlg_t", std::make_shared("int")}, - {"cublasLtHandle_t", - std::make_shared( - getLibraryHelperNamespace() + "blas_gemm::experimental::descriptor_ptr")}, - {"cublasLtMatmulDesc_t", - std::make_shared( - getLibraryHelperNamespace() + "blas_gemm::experimental::matmul_desc_ptr")}, + {"cublasLtHandle_t", std::make_shared( + getLibraryHelperNamespace() + + "blas_gemm::experimental::descriptor_ptr")}, + {"cublasLtMatmulDesc_t", std::make_shared( + getLibraryHelperNamespace() + + "blas_gemm::experimental::matmul_desc_ptr")}, {"cublasLtOrder_t", std::make_shared(getLibraryHelperNamespace() + "blas_gemm::experimental::order_t")}, - {"cublasLtPointerMode_t", - std::make_shared( - getLibraryHelperNamespace() + "blas_gemm::experimental::pointer_mode_t")}, + {"cublasLtPointerMode_t", std::make_shared( + getLibraryHelperNamespace() + + "blas_gemm::experimental::pointer_mode_t")}, {"cublasLtMatrixLayout_t", std::make_shared( - getLibraryHelperNamespace() + "blas_gemm::experimental::matrix_layout_ptr")}, + getLibraryHelperNamespace() + + "blas_gemm::experimental::matrix_layout_ptr")}, {"cublasLtMatrixLayoutAttribute_t", std::make_shared( getLibraryHelperNamespace() + @@ -805,7 +841,8 @@ void MapNames::setExplicitNamespaceMap( std::make_shared("int")}, {"cublasLtMatrixTransformDesc_t", std::make_shared( - getLibraryHelperNamespace() + "blas_gemm::experimental::transform_desc_ptr")}, + getLibraryHelperNamespace() + + "blas_gemm::experimental::transform_desc_ptr")}, {"cudaGraphicsMapFlags", std::make_shared("int")}, {"cudaGraphicsRegisterFlags", std::make_shared("int")}, // ... @@ -1281,15 +1318,30 @@ void MapNames::setExplicitNamespaceMap( HelperFeatureEnum::device_ext)}, // enum Memcpy Kind {"cudaMemcpyHostToHost", - std::make_shared(getDpctNamespace() + "host_to_host")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "host_to_host")}, {"cudaMemcpyHostToDevice", - std::make_shared(getDpctNamespace() + "host_to_device")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "host_to_device")}, {"cudaMemcpyDeviceToHost", - std::make_shared(getDpctNamespace() + "device_to_host")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "device_to_host")}, {"cudaMemcpyDeviceToDevice", - std::make_shared(getDpctNamespace() + "device_to_device")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "device_to_device")}, {"cudaMemcpyDefault", - std::make_shared(getDpctNamespace() + "automatic")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "automatic")}, // enum cudaMemory Type {"cudaMemoryTypeHost", std::make_shared(getClNamespace() + "usm::alloc::host", @@ -1448,11 +1500,11 @@ void MapNames::setExplicitNamespaceMap( std::make_shared(getDpctNamespace() + "pointer_attributes::type::memory_type")}, {"CU_POINTER_ATTRIBUTE_DEVICE_POINTER", - std::make_shared(getDpctNamespace() + - "pointer_attributes::type::device_pointer")}, + std::make_shared( + getDpctNamespace() + "pointer_attributes::type::device_pointer")}, {"CU_POINTER_ATTRIBUTE_HOST_POINTER", - std::make_shared(getDpctNamespace() + - "pointer_attributes::type::host_pointer")}, + std::make_shared( + getDpctNamespace() + "pointer_attributes::type::host_pointer")}, {"CU_POINTER_ATTRIBUTE_P2P_TOKENS", std::make_shared(getDpctNamespace() + "pointer_attributes::type::unsupported")}, @@ -1494,7 +1546,7 @@ void MapNames::setExplicitNamespaceMap( {"CU_MEMORYTYPE_UNIFIED", std::make_shared(getClNamespace() + "usm::alloc::shared", HelperFeatureEnum::device_ext)}, - + // enum CUlimit {"CU_LIMIT_PRINTF_FIFO_SIZE", std::make_shared("INT_MAX")}, @@ -1556,75 +1608,99 @@ void MapNames::setExplicitNamespaceMap( "image_data_type::pitch", HelperFeatureEnum::device_ext)}, // enum libraryPropertyType_t - {"MAJOR_VERSION", std::make_shared( - getLibraryHelperNamespace() + "version_field::major", - HelperFeatureEnum::device_ext)}, - {"MINOR_VERSION", std::make_shared( - getLibraryHelperNamespace() + "version_field::update", - HelperFeatureEnum::device_ext)}, + {"MAJOR_VERSION", + std::make_shared(getLibraryHelperNamespace() + + "version_field::major", + HelperFeatureEnum::device_ext)}, + {"MINOR_VERSION", + std::make_shared(getLibraryHelperNamespace() + + "version_field::update", + HelperFeatureEnum::device_ext)}, {"PATCH_LEVEL", std::make_shared( getLibraryHelperNamespace() + "version_field::patch", HelperFeatureEnum::device_ext)}, // enum cudaDataType_t - {"CUDA_R_16F", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_half")}, - {"CUDA_C_16F", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_half")}, + {"CUDA_R_16F", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_half")}, + {"CUDA_C_16F", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_half")}, {"CUDA_R_16BF", std::make_shared(getLibraryHelperNamespace() + "library_data_t::real_bfloat16")}, {"CUDA_C_16BF", std::make_shared(getLibraryHelperNamespace() + "library_data_t::complex_bfloat16")}, - {"CUDA_R_32F", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_float")}, - {"CUDA_C_32F", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_float")}, - {"CUDA_R_64F", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_double")}, + {"CUDA_R_32F", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_float")}, + {"CUDA_C_32F", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_float")}, + {"CUDA_R_64F", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_double")}, {"CUDA_C_64F", std::make_shared(getLibraryHelperNamespace() + "library_data_t::complex_double")}, - {"CUDA_R_4I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_int4")}, - {"CUDA_C_4I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_int4")}, - {"CUDA_R_4U", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_uint4")}, - {"CUDA_C_4U", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_uint4")}, - {"CUDA_R_8I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_int8")}, - {"CUDA_C_8I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_int8")}, - {"CUDA_R_8U", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_uint8")}, - {"CUDA_C_8U", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_uint8")}, - {"CUDA_R_16I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_int16")}, - {"CUDA_C_16I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_int16")}, - {"CUDA_R_16U", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_uint16")}, + {"CUDA_R_4I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_int4")}, + {"CUDA_C_4I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_int4")}, + {"CUDA_R_4U", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_uint4")}, + {"CUDA_C_4U", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_uint4")}, + {"CUDA_R_8I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_int8")}, + {"CUDA_C_8I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_int8")}, + {"CUDA_R_8U", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_uint8")}, + {"CUDA_C_8U", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_uint8")}, + {"CUDA_R_16I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_int16")}, + {"CUDA_C_16I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_int16")}, + {"CUDA_R_16U", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_uint16")}, {"CUDA_C_16U", std::make_shared(getLibraryHelperNamespace() + "library_data_t::complex_uint16")}, - {"CUDA_R_32I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_int32")}, - {"CUDA_C_32I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_int32")}, - {"CUDA_R_32U", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_uint32")}, + {"CUDA_R_32I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_int32")}, + {"CUDA_C_32I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_int32")}, + {"CUDA_R_32U", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_uint32")}, {"CUDA_C_32U", std::make_shared(getLibraryHelperNamespace() + "library_data_t::complex_uint32")}, - {"CUDA_R_64I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_int64")}, - {"CUDA_C_64I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_int64")}, - {"CUDA_R_64U", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_uint64")}, + {"CUDA_R_64I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_int64")}, + {"CUDA_C_64I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_int64")}, + {"CUDA_R_64U", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_uint64")}, {"CUDA_C_64U", std::make_shared(getLibraryHelperNamespace() + "library_data_t::complex_uint64")}, @@ -1658,26 +1734,26 @@ void MapNames::setExplicitNamespaceMap( {"cuda::memory_order_seq_cst", std::make_shared(getClNamespace() + "memory_order::seq_cst")}, - {"CUFFT_R2C", - std::make_shared( - getLibraryHelperNamespace() + "fft::fft_type::real_float_to_complex_float", - HelperFeatureEnum::device_ext)}, - {"CUFFT_C2R", - std::make_shared( - getLibraryHelperNamespace() + "fft::fft_type::complex_float_to_real_float", - HelperFeatureEnum::device_ext)}, - {"CUFFT_D2Z", - std::make_shared( - getLibraryHelperNamespace() + "fft::fft_type::real_double_to_complex_double", - HelperFeatureEnum::device_ext)}, - {"CUFFT_Z2D", - std::make_shared( - getLibraryHelperNamespace() + "fft::fft_type::complex_double_to_real_double", - HelperFeatureEnum::device_ext)}, - {"CUFFT_C2C", - std::make_shared( - getLibraryHelperNamespace() + "fft::fft_type::complex_float_to_complex_float", - HelperFeatureEnum::device_ext)}, + {"CUFFT_R2C", std::make_shared( + getLibraryHelperNamespace() + + "fft::fft_type::real_float_to_complex_float", + HelperFeatureEnum::device_ext)}, + {"CUFFT_C2R", std::make_shared( + getLibraryHelperNamespace() + + "fft::fft_type::complex_float_to_real_float", + HelperFeatureEnum::device_ext)}, + {"CUFFT_D2Z", std::make_shared( + getLibraryHelperNamespace() + + "fft::fft_type::real_double_to_complex_double", + HelperFeatureEnum::device_ext)}, + {"CUFFT_Z2D", std::make_shared( + getLibraryHelperNamespace() + + "fft::fft_type::complex_double_to_real_double", + HelperFeatureEnum::device_ext)}, + {"CUFFT_C2C", std::make_shared( + getLibraryHelperNamespace() + + "fft::fft_type::complex_float_to_complex_float", + HelperFeatureEnum::device_ext)}, {"CUFFT_Z2Z", std::make_shared( getLibraryHelperNamespace() + "fft::fft_type::complex_double_to_complex_double", diff --git a/clang/lib/DPCT/Rewriters/RewriterSYCLcompat.cpp b/clang/lib/DPCT/Rewriters/RewriterSYCLcompat.cpp index 7e40e38da768..2d5f54e75b3e 100644 --- a/clang/lib/DPCT/Rewriters/RewriterSYCLcompat.cpp +++ b/clang/lib/DPCT/Rewriters/RewriterSYCLcompat.cpp @@ -57,6 +57,10 @@ SYCLCOMPAT_UNSUPPORT("cuMemcpyHtoAAsync_v2") SYCLCOMPAT_UNSUPPORT("cuMemcpyAtoD_v2") SYCLCOMPAT_UNSUPPORT("cuMemcpyDtoA_v2") SYCLCOMPAT_UNSUPPORT("cuMemcpyAtoA_v2") +SYCLCOMPAT_UNSUPPORT("cuMemcpyPeer") +SYCLCOMPAT_UNSUPPORT("cuMemcpyPeerAsync") +SYCLCOMPAT_UNSUPPORT("cudaMemcpyPeer") +SYCLCOMPAT_UNSUPPORT("cudaMemcpyPeerAsync") }); } diff --git a/clang/test/dpct/driver-mem-syclcompat.cu b/clang/test/dpct/driver-mem-syclcompat.cu index 410db88affab..a92c6bcd5c1d 100644 --- a/clang/test/dpct/driver-mem-syclcompat.cu +++ b/clang/test/dpct/driver-mem-syclcompat.cu @@ -94,13 +94,12 @@ int main(){ // CHECK: r = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(f_D, f_D2, size)); r = cuMemcpyAsync(f_D, f_D2, size, 0); - // CHECK: syclcompat::memcpy(f_D, c1, f_D2, c2, size); +#ifndef BUILD_TEST + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cuMemcpyPeer" is not supported with SYCLcompat currently, please adjust the code manually. cuMemcpyPeer(f_D, c1, f_D2, c2, size); - // CHECK: /* - // CHECK-NEXT: DPCT1124:{{[0-9]+}}: cuMemcpyPeerAsync is migrated to asynchronous memcpy API. While the origin API might be synchronous, it depends on the type of operand memory, so you may need to call wait() on event return by memcpy API to ensure synchronization behavior. - // CHECK-NEXT: */ - // CHECK-NEXT: syclcompat::memcpy_async(f_D, c1, f_D2, c2, size, *stream); + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cuMemcpyPeerAsync" is not supported with SYCLcompat currently, please adjust the code manually. cuMemcpyPeerAsync(f_D, c1, f_D2, c2, size, stream); +#endif unsigned int v32 = 50000; unsigned short v16 = 20000; @@ -130,7 +129,7 @@ int main(){ cuMemsetD2D16Async(f_D, 1, v16, 4 * 2, 6, stream); cuMemsetD2D8Async(f_D, 1, v8, 4 * 4, 6, stream); - // CHECK: syclcompat::memcpy_parameter cpy; + // CHECK: syclcompat::experimental::memcpy_parameter cpy; // CHECK-NEXT: cpy.to.pitched.set_data_ptr(f_A); // CHECK-NEXT: cpy.to.pitched.set_pitch(20); // CHECK-NEXT: cpy.to.pos[1] = 10; @@ -159,9 +158,9 @@ int main(){ cpy.WidthInBytes = 4; cpy.Height = 7; - // CHECK: syclcompat::memcpy(cpy); + // CHECK: syclcompat::experimental::memcpy(cpy); cuMemcpy2D(&cpy); - // CHECK: syclcompat::memcpy_async(cpy, *stream); + // CHECK: syclcompat::experimental::memcpy_async(cpy, *stream); cuMemcpy2DAsync(&cpy, stream); CUdeviceptr devicePtr; @@ -258,9 +257,8 @@ int main(){ // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100))); cuCheckError(cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamPerThread)); - // CHECK: syclcompat::memcpy_parameter cpy2; - // CHECK-EMPTY: - // CHECK-NEXT: /* + // CHECK: syclcompat::experimental::memcpy_parameter cpy2; + // CHECK: /* // CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "CUarray" is not supported with SYCLcompat currently, please adjust the code manually. // CHECK-NEXT: */ // CHECK-NEXT: CUarray ca; @@ -270,8 +268,7 @@ int main(){ // CHECK-NEXT: cpy2.to.pos[1] = 3; // CHECK-NEXT: cpy2.to.pos[2] = 2; // CHECK-NEXT: cpy2.to.pos[0] = 1; - // CHECK-EMPTY: - // CHECK-NEXT: cpy2.from.pitched.set_data_ptr(f_A); + // CHECK: cpy2.from.pitched.set_data_ptr(f_A); // CHECK-NEXT: cpy2.from.pitched.set_pitch(5); // CHECK-NEXT: cpy2.from.pitched.set_y(4); // CHECK-NEXT: cpy2.from.pos[1] = 3; @@ -282,7 +279,7 @@ int main(){ // CHECK-NEXT: cpy2.size[1] = 2; // CHECK-NEXT: cpy2.size[2] = 1; CUDA_MEMCPY3D cpy2; - +#ifndef BUILD_TEST CUarray ca; cpy2.dstMemoryType = CU_MEMORYTYPE_ARRAY; cpy2.dstArray = ca; @@ -292,6 +289,7 @@ int main(){ cpy2.dstZ = 2; cpy2.dstXInBytes = 1; cpy2.dstLOD = 0; +#endif cpy2.srcMemoryType = CU_MEMORYTYPE_HOST; cpy2.srcHost = f_A; @@ -306,11 +304,11 @@ int main(){ cpy2.Height = 2; cpy2.Depth = 1; - // CHECK: syclcompat::memcpy(cpy2); + // CHECK: syclcompat::experimental::memcpy(cpy2); cuMemcpy3D(&cpy2); CUstream cs; - // CHECK: syclcompat::memcpy_async(cpy2, *cs); + // CHECK: syclcompat::experimental::memcpy_async(cpy2, *cs); cuMemcpy3DAsync(&cpy2, cs); float *h_A = (float *)malloc(100); diff --git a/clang/test/dpct/usm-syclcompat.cu b/clang/test/dpct/usm-syclcompat.cu index 58490f9c2733..51eb05385680 100644 --- a/clang/test/dpct/usm-syclcompat.cu +++ b/clang/test/dpct/usm-syclcompat.cu @@ -183,13 +183,10 @@ void foo() { // CHECK: syclcompat::memcpy(h_A, size, d_A, size, size, size); cudaMemcpy2D(h_A, size, d_A, size, size, size, cudaMemcpyDeviceToHost); - // CHECK: syclcompat::memcpy(parms); + // CHECK: syclcompat::experimental::memcpy(parms); cudaMemcpy3D(&parms); -#ifndef BUILD_TEST - struct cudaMemcpy3DParms *parms_pointer; - // Followed call can't be processed. + // CHECK: syclcompat::experimental::memcpy(*(parms_pointer)); cudaMemcpy3D(parms_pointer); -#endif // CHECK: syclcompat::memcpy_async(d_A, size, h_A, size, size, size); cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice); @@ -205,11 +202,11 @@ void foo() { // CHECK: syclcompat::memcpy_async(h_A, size, d_A, size, size, size, *stream); cudaMemcpy2DAsync(h_A, size, d_A, size, size, size, cudaMemcpyDeviceToHost, stream); - // CHECK: syclcompat::memcpy_async(parms); + // CHECK: syclcompat::experimental::memcpy_async(parms); cudaMemcpy3DAsync(&parms); - // CHECK: syclcompat::memcpy_async(parms); + // CHECK: syclcompat::experimental::memcpy_async(parms); cudaMemcpy3DAsync(&parms, 0); - // CHECK: syclcompat::memcpy_async(parms, *stream); + // CHECK: syclcompat::experimental::memcpy_async(parms, *stream); cudaMemcpy3DAsync(&parms, stream); /// memcpy from symbol @@ -554,7 +551,6 @@ void foo3() { cudaMemcpy3DParms parms; int *data; size_t width, height, depth, pitch, woffset, hoffset; - cudaArray_t a1; int deviceID = 0; // CHECK: auto s1 = std::make_shared((syclcompat::queue_ptr)&q_ct1); @@ -641,15 +637,15 @@ void foo3() { MY_SAFE_CALL(cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamLegacy)); MY_SAFE_CALL(cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamPerThread)); - // CHECK: syclcompat::memcpy_async(parms); - // CHECK: syclcompat::memcpy_async(parms); - // CHECK: syclcompat::memcpy_async(parms); - // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(parms)); - // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(parms)); - // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(parms)); - // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(parms))); - // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(parms))); - // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(parms))); + // CHECK: syclcompat::experimental::memcpy_async(parms); + // CHECK: syclcompat::experimental::memcpy_async(parms); + // CHECK: syclcompat::experimental::memcpy_async(parms); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::experimental::memcpy_async(parms)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::experimental::memcpy_async(parms)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::experimental::memcpy_async(parms)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::experimental::memcpy_async(parms))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::experimental::memcpy_async(parms))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::experimental::memcpy_async(parms))); cudaMemcpy3DAsync(&parms, cudaStreamDefault); cudaMemcpy3DAsync(&parms, cudaStreamLegacy); cudaMemcpy3DAsync(&parms, cudaStreamPerThread); @@ -660,7 +656,8 @@ void foo3() { MY_SAFE_CALL(cudaMemcpy3DAsync(&parms, cudaStreamLegacy)); MY_SAFE_CALL(cudaMemcpy3DAsync(&parms, cudaStreamPerThread)); - +#ifndef BUILD_TEST + cudaArray_t a1; // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. @@ -738,6 +735,7 @@ void foo3() { MY_SAFE_CALL(cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamDefault)); MY_SAFE_CALL(cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamLegacy)); MY_SAFE_CALL(cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamPerThread)); +#endif // CHECK: q_ct1.memset(d_A, 23, size); From e4f8825d4dc7090b41d6025dbbbdc3eecfe75458 Mon Sep 17 00:00:00 2001 From: Ziran Zhang Date: Thu, 29 Aug 2024 15:32:12 +0800 Subject: [PATCH 4/5] Resolve conficts --- clang/test/dpct/driver-mem-syclcompat.cu | 36 ++++++++++++------------ clang/test/dpct/usm-syclcompat.cu | 36 ++++++++++++------------ 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/clang/test/dpct/driver-mem-syclcompat.cu b/clang/test/dpct/driver-mem-syclcompat.cu index a92c6bcd5c1d..f02293366697 100644 --- a/clang/test/dpct/driver-mem-syclcompat.cu +++ b/clang/test/dpct/driver-mem-syclcompat.cu @@ -176,55 +176,55 @@ int main(){ // CHECK-NEXT: int advise = 0; CUmem_advise advise = CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION; - // CHECK: syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, advise); + // CHECK: syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, advise); cuMemAdvise(devicePtr, count, advise, cudevice); - // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, advise))); + // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, advise))); cuCheckError(cuMemAdvise(devicePtr, count, advise, cudevice)); - // CHECK: cu_err = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, advise)); + // CHECK: cu_err = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, advise)); cu_err = cuMemAdvise(devicePtr, count, advise, cudevice); // CHECK: /* // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. // CHECK-NEXT: */ - // CHECK-NEXT: syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0); + // CHECK-NEXT: syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0); cuMemAdvise(devicePtr, count, CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, cudevice); // CHECK: /* // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. // CHECK-NEXT: */ - // CHECK-NEXT: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0))); + // CHECK-NEXT: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0))); cuCheckError(cuMemAdvise(devicePtr, count, CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, cudevice)); // CHECK: /* // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. // CHECK-NEXT: */ - // CHECK-NEXT: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0))); + // CHECK-NEXT: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0))); cuCheckError(cuMemAdvise(devicePtr, count, (CUmem_advise)1, cudevice)); // CHECK: /* // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. // CHECK-NEXT: */ - // CHECK-NEXT: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0))); + // CHECK-NEXT: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0))); cuCheckError(cuMemAdvise(devicePtr, count, CUmem_advise(1), cudevice)); // CHECK: /* // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. // CHECK-NEXT: */ - // CHECK-NEXT: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0))); + // CHECK-NEXT: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0))); cuCheckError(cuMemAdvise(devicePtr, count, static_cast(1), cudevice)); // CHECK: /* // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. // CHECK-NEXT: */ - // CHECK-NEXT: cu_err = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0)); + // CHECK-NEXT: cu_err = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0)); cu_err = cuMemAdvise(devicePtr, count, CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, cudevice); // CHECK: /* // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. // CHECK-NEXT: */ - // CHECK-NEXT: syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0); + // CHECK-NEXT: syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0); cuMemAdvise(devicePtr, count, CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, cudevice); // CHECK: /* @@ -240,21 +240,21 @@ int main(){ cuMemPrefetchAsync (devPtr, 100, cudevice, stream); // CHECK: (*&stream)->prefetch(devPtr, 100); cuMemPrefetchAsync (devPtr, 100, cudevice, *&stream); - // CHECK: curesult = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + // CHECK: curesult = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100)); curesult = cuMemPrefetchAsync (devPtr, 100, cudevice, NULL); - // CHECK: syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100); + // CHECK: syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100); cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamPerThread); - // CHECK: curesult = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + // CHECK: curesult = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100)); curesult = cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamDefault); - // CHECK: curesult = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + // CHECK: curesult = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100)); curesult = cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamLegacy); - // CHECK: curesult = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + // CHECK: curesult = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100)); curesult = cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamPerThread); - // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100))); cuCheckError(cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamDefault)); - // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100))); cuCheckError(cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamLegacy)); - // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100))); cuCheckError(cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamPerThread)); // CHECK: syclcompat::experimental::memcpy_parameter cpy2; diff --git a/clang/test/dpct/usm-syclcompat.cu b/clang/test/dpct/usm-syclcompat.cu index 51eb05385680..9ad60895f57d 100644 --- a/clang/test/dpct/usm-syclcompat.cu +++ b/clang/test/dpct/usm-syclcompat.cu @@ -798,15 +798,15 @@ void foo3() { MY_SAFE_CALL(cudaMemset3DAsync(p_A, 0xf, e, cudaStreamPerThread)); - // CHECK: syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100); - // CHECK: syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100); - // CHECK: syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100); - // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100)); - // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100)); - // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100)); - // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100))); - // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100))); - // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(deviceID).default_queue()->prefetch(d_A,100))); + // CHECK: syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100); + // CHECK: syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100); + // CHECK: syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100))); cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamDefault); cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamLegacy); cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamPerThread); @@ -820,15 +820,15 @@ void foo3() { CUdevice cudevice = 0; // CHECK: syclcompat::device_ptr devPtr; CUdeviceptr devPtr; - // CHECK: syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100); - // CHECK: syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100); - // CHECK: syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100); - // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100)); - // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100)); - // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100)); - // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100))); - // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100))); - // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::dev_mgr::instance().get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + // CHECK: syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100); + // CHECK: syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100); + // CHECK: syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100))); cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamDefault); cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamLegacy); cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamPerThread); From a2f1ac861c7ddce49ae706072809cf241007e073 Mon Sep 17 00:00:00 2001 From: Ziran Zhang Date: Thu, 29 Aug 2024 17:50:36 +0800 Subject: [PATCH 5/5] Fix lit build errors --- clang/test/dpct/driver-mem-syclcompat.cu | 2 +- clang/test/dpct/usm-syclcompat.cu | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/clang/test/dpct/driver-mem-syclcompat.cu b/clang/test/dpct/driver-mem-syclcompat.cu index f02293366697..7a5860555f4d 100644 --- a/clang/test/dpct/driver-mem-syclcompat.cu +++ b/clang/test/dpct/driver-mem-syclcompat.cu @@ -1,6 +1,6 @@ // RUN: dpct --format-range=none -use-syclcompat -out-root %T/driver-mem-syclcompat %s --cuda-include-path="%cuda-path/include" // RUN: FileCheck --match-full-lines --input-file %T/driver-mem-syclcompat/driver-mem-syclcompat.dp.cpp %s -// RUN: %if build_lit %{icpx -c -fsycl %T/driver-mem-syclcompat/driver-mem-syclcompat.dp.cpp -o %T/driver-mem/driver-mem-syclcompat.dp.o %} +// RUN: %if build_lit %{icpx -c -fsycl -DBUILD_TEST %T/driver-mem-syclcompat/driver-mem-syclcompat.dp.cpp -o %T/driver-mem/driver-mem-syclcompat.dp.o %} #include #include diff --git a/clang/test/dpct/usm-syclcompat.cu b/clang/test/dpct/usm-syclcompat.cu index 9ad60895f57d..22b4b0a7e562 100644 --- a/clang/test/dpct/usm-syclcompat.cu +++ b/clang/test/dpct/usm-syclcompat.cu @@ -1,4 +1,3 @@ -// FIXME // UNSUPPORTED: system-windows // RUN: dpct --format-range=none --use-syclcompat --usm-level=restricted -out-root %T/usm-syclcompat %s --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only // RUN: FileCheck --match-full-lines --input-file %T/usm-syclcompat/usm-syclcompat.dp.cpp %s @@ -32,7 +31,7 @@ void foo() { cudaPitchedPtr p_A; cudaExtent e; - cudaMemcpy3DParms parms; + cudaMemcpy3DParms parms, *parms_pointer; cudaStream_t stream; /// malloc @@ -185,7 +184,7 @@ void foo() { // CHECK: syclcompat::experimental::memcpy(parms); cudaMemcpy3D(&parms); - // CHECK: syclcompat::experimental::memcpy(*(parms_pointer)); + // CHECK: syclcompat::experimental::memcpy(*parms_pointer); cudaMemcpy3D(parms_pointer); // CHECK: syclcompat::memcpy_async(d_A, size, h_A, size, size, size);