diff --git a/clang/lib/DPCT/APINamesMemory.inc b/clang/lib/DPCT/APINamesMemory.inc index b8d0def6cd20..ed5576aee733 100644 --- a/clang/lib/DPCT/APINamesMemory.inc +++ b/clang/lib/DPCT/APINamesMemory.inc @@ -247,11 +247,13 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( "cuMemcpyDtoD_v2", CALL(MapNames::getDpctNamespace() + "dpct_memcpy", ARG(0), ARG(1), ARG(2), ARG(MapNames::getDpctNamespace() + "automatic")))))) -ASSIGNABLE_FACTORY( - FEATURE_REQUEST_FACTORY(HelperFeatureEnum::device_ext, +ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( + HelperFeatureEnum::device_ext, ASSIGN_FACTORY_ENTRY( "cuMemAllocPitch_v2", DEREF(makeDerefArgCreatorWithCall(0)), - CAST(getDerefedType(0), CALL(MapNames::getDpctNamespace() + "dpct_malloc", DEREF(makeCallArgCreatorWithCall(1)), ARG(2), ARG(3)))))) + CAST(getDerefedType(0), + CALL(getMemoryHelperFunctionName("malloc"), + DEREF(makeCallArgCreatorWithCall(1)), ARG(2), ARG(3)))))) ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( makeCheckNot(CheckArgIsDefaultCudaStream(3)), @@ -264,7 +266,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "get_device", ARG(2)), false, DpctGlobalInfo::getDeviceQueueName()), - false, "prefetch", ARG(0), ARG(1))))) + DpctGlobalInfo::useSYCLCompat(), "prefetch", ARG(0), ARG(1))))) ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( checkArgSpelling(3, "CU_DEVICE_CPU"), @@ -279,7 +281,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "cpu_device"), false, DpctGlobalInfo::getDeviceQueueName()), - false, "mem_advise", ARG(0), ARG(1), ARG("0"))), + DpctGlobalInfo::useSYCLCompat(), "mem_advise", ARG(0), ARG(1), ARG("0"))), Diagnostics::DEFAULT_MEM_ADVICE, ARG(" and was set to 0")), FEATURE_REQUEST_FACTORY( @@ -288,7 +290,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( "cuMemAdvise", MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "cpu_device"), false, DpctGlobalInfo::getDeviceQueueName()), - false, "mem_advise", ARG(0), ARG(1), ARG(2)))), + DpctGlobalInfo::useSYCLCompat(), "mem_advise", ARG(0), ARG(1), ARG(2)))), CONDITIONAL_FACTORY_ENTRY( checkIsArgIntegerLiteral(2), @@ -302,7 +304,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( "get_device", ARG(3)), false, DpctGlobalInfo::getDeviceQueueName()), - false, "mem_advise", ARG(0), ARG(1), ARG("0"))), + DpctGlobalInfo::useSYCLCompat(), "mem_advise", ARG(0), ARG(1), ARG("0"))), Diagnostics::DEFAULT_MEM_ADVICE, ARG(" and was set to 0")), FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, @@ -311,7 +313,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( MEMBER_CALL(CALL(MapNames::getDpctNamespace() + "get_device", ARG(3)), false, DpctGlobalInfo::getDeviceQueueName()), - false, "mem_advise", ARG(0), ARG(1), ARG(2)))))) + DpctGlobalInfo::useSYCLCompat(), "mem_advise", ARG(0), ARG(1), ARG(2)))))) ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( checkIsUSM(), @@ -426,16 +428,16 @@ ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( CheckDerefedTypeBeforeCast(0, "void *"), ASSIGN_FACTORY_ENTRY( "cudaMallocPitch", DEREF(makeDerefArgCreatorWithCall(0)), - CALL(MapNames::getDpctNamespace() + "dpct_malloc", + CALL(getMemoryHelperFunctionName("malloc"), DEREF(makeCallArgCreatorWithCall(1)), - makeCallArgCreatorWithCall(2), makeDerefArgCreatorWithCall(3))), + makeCallArgCreatorWithCall(2), + makeDerefArgCreatorWithCall(3))), ASSIGN_FACTORY_ENTRY( "cudaMallocPitch", DEREF(makeDerefArgCreatorWithCall(0)), - CAST(getDerefedType(0), - CALL(MapNames::getDpctNamespace() + "dpct_malloc", - DEREF(makeCallArgCreatorWithCall(1)), - makeCallArgCreatorWithCall(2), - makeCallArgCreatorWithCall(3))))))) + CAST(getDerefedType(0), CALL(getMemoryHelperFunctionName("malloc"), + DEREF(makeCallArgCreatorWithCall(1)), + makeCallArgCreatorWithCall(2), + makeCallArgCreatorWithCall(3))))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, @@ -621,78 +623,78 @@ ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD8_v2", - CALL(MapNames::getDpctNamespace() + "dpct_memset", + CALL(getMemoryHelperFunctionName("memset"), ARG(0), ARG(1), ARG(2))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD16_v2", - CALL(MapNames::getDpctNamespace() + "dpct_memset_d16", + CALL(getMemoryHelperFunctionName("memset_d16"), ARG(0), ARG(1), ARG(2))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD32_v2", - CALL(MapNames::getDpctNamespace() + "dpct_memset_d32", + CALL(getMemoryHelperFunctionName("memset_d32"), ARG(0), ARG(1), ARG(2))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD8Async", - CALL(MapNames::getDpctNamespace() + "async_dpct_memset", + CALL(getMemoryHelperFunctionName("memset_async"), ARG(0), ARG(1), ARG(2), DEREF(makeCallArgCreatorWithCall(3)))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD16Async", - CALL(MapNames::getDpctNamespace() + "async_dpct_memset_d16", + CALL(getMemoryHelperFunctionName("memset_d16_async"), ARG(0), ARG(1), ARG(2), DEREF(makeCallArgCreatorWithCall(3)))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD32Async", - CALL(MapNames::getDpctNamespace() + "async_dpct_memset_d32", + CALL(getMemoryHelperFunctionName("memset_d32_async"), ARG(0), ARG(1), ARG(2), DEREF(makeCallArgCreatorWithCall(3)))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD2D8_v2", - CALL(MapNames::getDpctNamespace() + "dpct_memset", + CALL(getMemoryHelperFunctionName("memset"), ARG(0), ARG(1), ARG(2), ARG(3), ARG(4))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD2D16_v2", - CALL(MapNames::getDpctNamespace() + "dpct_memset_d16", + CALL(getMemoryHelperFunctionName("memset_d16"), ARG(0), ARG(1), ARG(2), ARG(3), ARG(4))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD2D32_v2", - CALL(MapNames::getDpctNamespace() + "dpct_memset_d32", + CALL(getMemoryHelperFunctionName("memset_d32"), ARG(0), ARG(1), ARG(2), ARG(3), ARG(4))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD2D8Async", - CALL(MapNames::getDpctNamespace() + "async_dpct_memset", + CALL(getMemoryHelperFunctionName("memset_async"), ARG(0), ARG(1), ARG(2), ARG(3), ARG(4), DEREF(makeCallArgCreatorWithCall(5)))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD2D16Async", - CALL(MapNames::getDpctNamespace() + "async_dpct_memset_d16", + CALL(getMemoryHelperFunctionName("memset_d16_async"), ARG(0), ARG(1), ARG(2), ARG(3), ARG(4), DEREF(makeCallArgCreatorWithCall(5)))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("cuMemsetD2D32Async", - CALL(MapNames::getDpctNamespace() + "async_dpct_memset_d32", + CALL(getMemoryHelperFunctionName("memset_d32_async"), ARG(0), ARG(1), ARG(2), ARG(3), ARG(4), DEREF(makeCallArgCreatorWithCall(5)))))) diff --git a/clang/lib/DPCT/ASTTraversal.cpp b/clang/lib/DPCT/ASTTraversal.cpp index 51746528205c..b62d33f5d29a 100644 --- a/clang/lib/DPCT/ASTTraversal.cpp +++ b/clang/lib/DPCT/ASTTraversal.cpp @@ -4503,14 +4503,15 @@ void BLASFunctionCallRule::runRule(const MatchFinder::MatchResult &Result) { " = " + MapNames::getClNamespace() + "malloc_shared(" + "1, " + DefaultQueue + ");" + getNL() + IndentStr; - SuffixInsertStr = SuffixInsertStr + getNL() + IndentStr + "int " + - ResultTempHost + " = (int)*" + ResultTempPtr + - ";" + getNL() + IndentStr + - MapNames::getDpctNamespace() + "dpct_memcpy(" + - ExprAnalysis::ref(CE->getArg(i)) + ", &" + - ResultTempHost + ", sizeof(int));" + getNL() + - IndentStr + MapNames::getClNamespace() + "free(" + - ResultTempPtr + ", " + DefaultQueue + ");"; + SuffixInsertStr = + SuffixInsertStr + getNL() + IndentStr + "int " + + ResultTempHost + " = (int)*" + ResultTempPtr + ";" + getNL() + + IndentStr + + MemoryMigrationRule::getMemoryHelperFunctionName("memcpy") + + "(" + ExprAnalysis::ref(CE->getArg(i)) + ", &" + + ResultTempHost + ", sizeof(int));" + getNL() + IndentStr + + MapNames::getClNamespace() + "free(" + ResultTempPtr + ", " + + DefaultQueue + ");"; CurrentArgumentRepl = ResultTempPtr; } else { CurrentArgumentRepl = ExprAnalysis::ref(CE->getArg(i)); @@ -4640,14 +4641,15 @@ void BLASFunctionCallRule::runRule(const MatchFinder::MatchResult &Result) { " = " + MapNames::getClNamespace() + "malloc_shared(" + "1, " + DefaultQueue + ");" + getNL() + IndentStr; - SuffixInsertStr = SuffixInsertStr + getNL() + IndentStr + "int " + - ResultTempHost + " = (int)*" + ResultTempPtr + - ";" + getNL() + IndentStr + - MapNames::getDpctNamespace() + "dpct_memcpy(" + - ExprAnalysis::ref(CE->getArg(i)) + ", &" + - ResultTempHost + ", sizeof(int));" + getNL() + - IndentStr + MapNames::getClNamespace() + "free(" + - ResultTempPtr + ", " + DefaultQueue + ");"; + SuffixInsertStr = + SuffixInsertStr + getNL() + IndentStr + "int " + + ResultTempHost + " = (int)*" + ResultTempPtr + ";" + getNL() + + IndentStr + + MemoryMigrationRule::getMemoryHelperFunctionName("memcpy") + + "(" + ExprAnalysis::ref(CE->getArg(i)) + ", &" + + ResultTempHost + ", sizeof(int));" + getNL() + IndentStr + + MapNames::getClNamespace() + "free(" + ResultTempPtr + ", " + + DefaultQueue + ");"; CurrentArgumentRepl = ResultTempPtr; } else if (ReplInfo.BufferTypeInfo[IndexTemp] == "std::complex" || @@ -5704,7 +5706,7 @@ void SOLVERFunctionCallRule::runRule(const MatchFinder::MatchResult &Result) { if (HasDeviceAttr) { report(CE->getBeginLoc(), Diagnostics::FUNCTION_CALL_IN_DEVICE, false, MapNames::ITFName.at(FuncName), - MapNames::getDpctNamespace() + "dpct_memcpy"); + MemoryMigrationRule::getMemoryHelperFunctionName("memcpy")); return; } @@ -10162,7 +10164,7 @@ void MemoryMigrationRule::mallocMigration( requestFeature(HelperFeatureEnum::device_ext); emplaceTransformation(new InsertBeforeStmt(C, OS.str())); emplaceTransformation( - new ReplaceCalleeName(C, MapNames::getDpctNamespace() + "dpct_malloc")); + new ReplaceCalleeName(C, MemoryMigrationRule::getMemoryHelperFunctionName("malloc"))); emplaceTransformation(removeArg(C, 0, *Result.SourceManager)); std::ostringstream OS2; printDerefOp(OS2, C->getArg(1)); @@ -10248,6 +10250,8 @@ void MemoryMigrationRule::memcpyMigration( // Detect if there is Async in the func name and crop the async substr std::string NameRef = Name; bool IsAsync = false; + // Whether in experimental namespace in syclcompat. + bool IsExperimentalInSYCLCompat = false; size_t AsyncLoc = NameRef.find("Async"); if (AsyncLoc != std::string::npos) { IsAsync = true; @@ -10266,6 +10270,7 @@ void MemoryMigrationRule::memcpyMigration( llvm::raw_string_ostream OS(Replacement); DerefExpr(C->getArg(0), C).print(OS); emplaceTransformation(new ReplaceStmt(C->getArg(0), Replacement)); + IsExperimentalInSYCLCompat = true; } else if (!NameRef.compare("cudaMemcpy") || NameRef.rfind("cuMemcpyDtoH", 0) == 0) { if (!NameRef.compare("cudaMemcpy")) { @@ -10343,10 +10348,12 @@ void MemoryMigrationRule::memcpyMigration( if (ReplaceStr.empty()) { if (IsAsync) { - ReplaceStr = MapNames::getDpctNamespace() + "async_dpct_memcpy"; + ReplaceStr = MemoryMigrationRule::getMemoryHelperFunctionName( + "memcpy_async", IsExperimentalInSYCLCompat); requestFeature(HelperFeatureEnum::device_ext); } else { - ReplaceStr = MapNames::getDpctNamespace() + "dpct_memcpy"; + ReplaceStr = MemoryMigrationRule::getMemoryHelperFunctionName( + "memcpy", IsExperimentalInSYCLCompat); requestFeature(HelperFeatureEnum::device_ext); } } @@ -10365,6 +10372,15 @@ void MemoryMigrationRule::memcpyMigration( void MemoryMigrationRule::arrayMigration( const ast_matchers::MatchFinder::MatchResult &Result, const CallExpr *C, const UnresolvedLookupExpr *ULExpr, bool IsAssigned) { + if (DpctGlobalInfo::useSYCLCompat()) { + ExprAnalysis EA; + if (ULExpr) + EA.analyze(ULExpr); + else + EA.analyze(C); + emplaceTransformation(EA.getReplacement()); + return; + } std::string Name; if (ULExpr) { Name = ULExpr->getName().getAsString(); @@ -10757,10 +10773,10 @@ void MemoryMigrationRule::memsetMigration( bool IsAsync = NameRef.ends_with("Async"); if (IsAsync) { NameRef = NameRef.drop_back(5 /* len of "Async" */); - ReplaceStr = MapNames::getDpctNamespace() + "async_dpct_memset"; + ReplaceStr = MemoryMigrationRule::getMemoryHelperFunctionName("memset_async"); requestFeature(HelperFeatureEnum::device_ext); } else { - ReplaceStr = MapNames::getDpctNamespace() + "dpct_memset"; + ReplaceStr = MemoryMigrationRule::getMemoryHelperFunctionName("memset"); requestFeature(HelperFeatureEnum::device_ext); } @@ -11456,6 +11472,10 @@ void MemoryMigrationRule::aggregate3DVectorClassCtor( } void MemoryMigrationRule::handleDirection(const CallExpr *C, unsigned i) { + if (DpctGlobalInfo::useSYCLCompat()) { + emplaceTransformation(removeArg(C, i, DpctGlobalInfo::getSourceManager())); + return; + } if (C->getNumArgs() > i && !C->getArg(i)->isDefaultArgument()) { if (auto DRE = dyn_cast(C->getArg(i))) { if (auto Enum = dyn_cast(DRE->getDecl())) { @@ -13112,11 +13132,11 @@ void TextureRule::registerMatcher(MatchFinder &MF) { "cudaTextureDesc", "cudaResourceDesc", "cudaResourceType", "cudaTextureAddressMode", "cudaTextureFilterMode", "cudaArray", "cudaArray_t", "CUarray_st", "CUarray", "CUarray_format", - "CUarray_format_enum", "CUdeviceptr", "CUresourcetype", - "CUresourcetype_enum", "CUaddress_mode", "CUaddress_mode_enum", - "CUfilter_mode", "CUfilter_mode_enum", "CUDA_RESOURCE_DESC", - "CUDA_TEXTURE_DESC", "CUtexref", "textureReference", - "cudaMipmappedArray", "cudaMipmappedArray_t")))))) + "CUarray_format_enum", "CUresourcetype", "CUresourcetype_enum", + "CUaddress_mode", "CUaddress_mode_enum", "CUfilter_mode", + "CUfilter_mode_enum", "CUDA_RESOURCE_DESC", "CUDA_TEXTURE_DESC", + "CUtexref", "textureReference", "cudaMipmappedArray", + "cudaMipmappedArray_t")))))) .bind("texType"), this); diff --git a/clang/lib/DPCT/ASTTraversal.h b/clang/lib/DPCT/ASTTraversal.h index 6449a94bc301..d2678b3eef1f 100644 --- a/clang/lib/DPCT/ASTTraversal.h +++ b/clang/lib/DPCT/ASTTraversal.h @@ -1344,6 +1344,16 @@ class MemoryMigrationRule : public NamedMigrationRule { void registerMatcher(ast_matchers::MatchFinder &MF) override; void runRule(const ast_matchers::MatchFinder::MatchResult &Result); + /// Get helper function name with namespace which has 'dpct_' in dpct helper + /// functions and w/o in syclcompat. + /// If has "_async" suffix, the name in dpct helper function will have + /// 'async_' prefix and remove the suffix. + /// If `ExperimentalInSYCLCompat` is true, will add `experimental` namespace + /// in syclcompat. + static std::string + getMemoryHelperFunctionName(StringRef RawName, + bool ExperimentalInSYCLCompat = false); + private: void mallocMigration(const ast_matchers::MatchFinder::MatchResult &Result, const CallExpr *C, diff --git a/clang/lib/DPCT/CallExprRewriterMemory.cpp b/clang/lib/DPCT/CallExprRewriterMemory.cpp index 4bcd87a72fe6..b46025163a45 100644 --- a/clang/lib/DPCT/CallExprRewriterMemory.cpp +++ b/clang/lib/DPCT/CallExprRewriterMemory.cpp @@ -12,6 +12,38 @@ namespace clang { namespace dpct { +/// Get helper function name with namespace which has 'dpct_' in dpct helper +/// functions and w/o in syclcompat. +/// If has "_async" suffix, the name in dpct helper function will have 'async_' +/// prefix and remove the suffix. +/// If `ExperimentalInSYCLCompat` is true, will add `experimental` namespace +/// in syclcompat. +std::string getMemoryHelperFunctionName(StringRef RawName, + bool ExperimentalInSYCLCompat = false) { + const static std::string AsyncSuffix = "_async"; + const static std::string AsyncPrefix = "async_"; + + std::string Result; + llvm::raw_string_ostream OS(Result); + OS << MapNames::getDpctNamespace(); + if (!DpctGlobalInfo::useSYCLCompat()) { + if (RawName.ends_with(AsyncSuffix)) { + RawName = RawName.drop_back(AsyncSuffix.length()); + OS << AsyncPrefix; + } + OS << "dpct_"; + } else if (ExperimentalInSYCLCompat) { + OS << "experimental::"; + } + OS << RawName; + return Result; +} + +std::string MemoryMigrationRule::getMemoryHelperFunctionName( + StringRef Name, bool ExperimentalInSYCLCompat) { + return dpct::getMemoryHelperFunctionName(Name, ExperimentalInSYCLCompat); +} + // clang-format off void CallExprRewriterFactoryBase::initRewriterMapMemory() { RewriterMap->merge( diff --git a/clang/lib/DPCT/MapNames.cpp b/clang/lib/DPCT/MapNames.cpp index eb149febeed5..5a5989ce5ce3 100644 --- a/clang/lib/DPCT/MapNames.cpp +++ b/clang/lib/DPCT/MapNames.cpp @@ -329,7 +329,8 @@ void MapNames::setExplicitNamespaceMap( std::make_shared(getDpctNamespace() + "kernel_function", HelperFeatureEnum::device_ext)}, {"CUpointer_attribute", - std::make_shared(getDpctNamespace() + "pointer_attributes::type")}, + std::make_shared(getDpctNamespace() + + "pointer_attributes::type")}, {"cudaPointerAttributes", std::make_shared(getDpctNamespace() + "pointer_attributes", HelperFeatureEnum::device_ext)}, @@ -407,23 +408,25 @@ void MapNames::setExplicitNamespaceMap( {"ushort2", std::make_shared(getClNamespace() + "ushort2")}, {"ushort3", std::make_shared(getClNamespace() + "ushort3")}, {"ushort4", std::make_shared(getClNamespace() + "ushort4")}, - {"cublasHandle_t", std::make_shared( - getLibraryHelperNamespace() + "blas::descriptor_ptr", - HelperFeatureEnum::device_ext)}, + {"cublasHandle_t", + std::make_shared(getLibraryHelperNamespace() + + "blas::descriptor_ptr", + HelperFeatureEnum::device_ext)}, {"cublasStatus_t", std::make_shared("int")}, {"cublasStatus", std::make_shared("int")}, {"cublasGemmAlgo_t", std::make_shared("int")}, - {"cudaDataType_t", - std::make_shared(getLibraryHelperNamespace() + "library_data_t", - HelperFeatureEnum::device_ext)}, - {"cudaDataType", - std::make_shared(getLibraryHelperNamespace() + "library_data_t", - HelperFeatureEnum::device_ext)}, - {"cublasDataType_t", - std::make_shared(getLibraryHelperNamespace() + "library_data_t", - HelperFeatureEnum::device_ext)}, + {"cudaDataType_t", std::make_shared( + getLibraryHelperNamespace() + "library_data_t", + HelperFeatureEnum::device_ext)}, + {"cudaDataType", std::make_shared( + getLibraryHelperNamespace() + "library_data_t", + HelperFeatureEnum::device_ext)}, + {"cublasDataType_t", std::make_shared( + getLibraryHelperNamespace() + "library_data_t", + HelperFeatureEnum::device_ext)}, {"cublasComputeType_t", - std::make_shared(getLibraryHelperNamespace() + "compute_type")}, + std::make_shared(getLibraryHelperNamespace() + + "compute_type")}, {"cuComplex", std::make_shared(getClNamespace() + "float2")}, {"cuFloatComplex", @@ -437,8 +440,8 @@ void MapNames::setExplicitNamespaceMap( std::make_shared("oneapi::mkl::transpose")}, {"cublasPointerMode_t", std::make_shared("int")}, {"cublasAtomicsMode_t", std::make_shared("int")}, - {"cublasMath_t", - std::make_shared(getLibraryHelperNamespace() + "blas::math_mode")}, + {"cublasMath_t", std::make_shared( + getLibraryHelperNamespace() + "blas::math_mode")}, {"cusparsePointerMode_t", std::make_shared("int")}, {"cusparseFillMode_t", std::make_shared("oneapi::mkl::uplo")}, @@ -454,7 +457,8 @@ void MapNames::setExplicitNamespaceMap( std::make_shared("oneapi::mkl::transpose")}, {"cusparseAlgMode_t", std::make_shared("int")}, {"cusparseSolveAnalysisInfo_t", - std::make_shared("std::shared_ptr<" + getLibraryHelperNamespace() + + std::make_shared("std::shared_ptr<" + + getLibraryHelperNamespace() + "sparse::optimize_info>", HelperFeatureEnum::device_ext)}, {"thrust::device_ptr", @@ -566,25 +570,31 @@ void MapNames::setExplicitNamespaceMap( {"cudaTextureFilterMode", std::make_shared(getClNamespace() + "filtering_mode")}, {"curandGenerator_t", - std::make_shared(getLibraryHelperNamespace() + "rng::host_rng_ptr", + std::make_shared(getLibraryHelperNamespace() + + "rng::host_rng_ptr", + HelperFeatureEnum::device_ext)}, + {"curandRngType_t", + std::make_shared(getLibraryHelperNamespace() + + "rng::random_engine_type", + HelperFeatureEnum::device_ext)}, + {"curandRngType", + std::make_shared(getLibraryHelperNamespace() + + "rng::random_engine_type", HelperFeatureEnum::device_ext)}, - {"curandRngType_t", std::make_shared( - getLibraryHelperNamespace() + "rng::random_engine_type", - HelperFeatureEnum::device_ext)}, - {"curandRngType", std::make_shared( - getLibraryHelperNamespace() + "rng::random_engine_type", - HelperFeatureEnum::device_ext)}, {"curandStatus_t", std::make_shared("int")}, {"curandStatus", std::make_shared("int")}, {"curandOrdering_t", - std::make_shared(getLibraryHelperNamespace() + "rng::random_mode")}, + std::make_shared(getLibraryHelperNamespace() + + "rng::random_mode")}, {"cusparseStatus_t", std::make_shared("int")}, {"cusparseMatDescr_t", - std::make_shared("std::shared_ptr<" + getLibraryHelperNamespace() + + std::make_shared("std::shared_ptr<" + + getLibraryHelperNamespace() + "sparse::matrix_info>", HelperFeatureEnum::device_ext)}, - {"cusparseHandle_t", std::make_shared( - getLibraryHelperNamespace() + "sparse::descriptor_ptr")}, + {"cusparseHandle_t", + std::make_shared(getLibraryHelperNamespace() + + "sparse::descriptor_ptr")}, {"cudaMemoryAdvise", std::make_shared("int")}, {"cudaStreamCaptureStatus", std::make_shared( @@ -603,7 +613,10 @@ void MapNames::setExplicitNamespaceMap( std::make_shared(getDpctNamespace() + "pitched_data", HelperFeatureEnum::device_ext)}, {"cudaMemcpyKind", - std::make_shared(getDpctNamespace() + "memcpy_direction")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "memcpy_direction")}, {"CUDA_ARRAY3D_DESCRIPTOR", std::make_shared( DpctGlobalInfo::useExtBindlessImages() @@ -617,15 +630,30 @@ void MapNames::setExplicitNamespaceMap( "ext::oneapi::experimental::image_descriptor" : getDpctNamespace() + "image_matrix_desc")}, {"cudaMemcpy3DParms", - std::make_shared(getDpctNamespace() + "memcpy_parameter")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "memcpy_parameter")}, {"CUDA_MEMCPY3D", - std::make_shared(getDpctNamespace() + "memcpy_parameter")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "memcpy_parameter")}, {"cudaMemcpy3DPeerParms", - std::make_shared(getDpctNamespace() + "memcpy_parameter")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "memcpy_parameter")}, {"CUDA_MEMCPY3D_PEER", - std::make_shared(getDpctNamespace() + "memcpy_parameter")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "memcpy_parameter")}, {"CUDA_MEMCPY2D", - std::make_shared(getDpctNamespace() + "memcpy_parameter")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "memcpy_parameter")}, {"cudaComputeMode", std::make_shared("int")}, {"cudaSharedMemConfig", std::make_shared("int")}, {"cufftReal", std::make_shared("float")}, @@ -636,12 +664,12 @@ void MapNames::setExplicitNamespaceMap( std::make_shared(getClNamespace() + "double2")}, {"cufftResult_t", std::make_shared("int")}, {"cufftResult", std::make_shared("int")}, - {"cufftType_t", - std::make_shared(getLibraryHelperNamespace() + "fft::fft_type", - HelperFeatureEnum::device_ext)}, - {"cufftType", - std::make_shared(getLibraryHelperNamespace() + "fft::fft_type", - HelperFeatureEnum::device_ext)}, + {"cufftType_t", std::make_shared( + getLibraryHelperNamespace() + "fft::fft_type", + HelperFeatureEnum::device_ext)}, + {"cufftType", std::make_shared( + getLibraryHelperNamespace() + "fft::fft_type", + HelperFeatureEnum::device_ext)}, {"cufftHandle", std::make_shared( getLibraryHelperNamespace() + "fft::fft_engine_ptr", HelperFeatureEnum::device_ext)}, @@ -709,17 +737,18 @@ void MapNames::setExplicitNamespaceMap( getClNamespace() + "marray<" + getClNamespace() + "ext::oneapi::bfloat16, 2>")}, {"libraryPropertyType_t", - std::make_shared(getLibraryHelperNamespace() + "version_field", - HelperFeatureEnum::device_ext)}, - {"libraryPropertyType", - std::make_shared(getLibraryHelperNamespace() + "version_field", + std::make_shared(getLibraryHelperNamespace() + + "version_field", HelperFeatureEnum::device_ext)}, + {"libraryPropertyType", std::make_shared( + getLibraryHelperNamespace() + "version_field", + HelperFeatureEnum::device_ext)}, {"ncclUniqueId", std::make_shared("oneapi::ccl::kvs::address_type", HelperFeatureEnum::device_ext)}, - {"ncclComm_t", - std::make_shared(getLibraryHelperNamespace() + "ccl::comm_ptr", - HelperFeatureEnum::device_ext)}, + {"ncclComm_t", std::make_shared( + getLibraryHelperNamespace() + "ccl::comm_ptr", + HelperFeatureEnum::device_ext)}, {"ncclRedOp_t", std::make_shared("oneapi::ccl::reduction")}, {"ncclDataType_t", std::make_shared("oneapi::ccl::datatype")}, @@ -733,22 +762,28 @@ void MapNames::setExplicitNamespaceMap( {"CUuuid", std::make_shared("std::array")}, {"cusparseIndexType_t", - std::make_shared(getLibraryHelperNamespace() + "library_data_t")}, - {"cusparseFormat_t", std::make_shared( - getLibraryHelperNamespace() + "sparse::matrix_format")}, + std::make_shared(getLibraryHelperNamespace() + + "library_data_t")}, + {"cusparseFormat_t", + std::make_shared(getLibraryHelperNamespace() + + "sparse::matrix_format")}, {"cusparseDnMatDescr_t", - std::make_shared("std::shared_ptr<" + getLibraryHelperNamespace() + + std::make_shared("std::shared_ptr<" + + getLibraryHelperNamespace() + "sparse::dense_matrix_desc>")}, {"cusparseConstDnMatDescr_t", - std::make_shared("std::shared_ptr<" + getLibraryHelperNamespace() + + std::make_shared("std::shared_ptr<" + + getLibraryHelperNamespace() + "sparse::dense_matrix_desc>")}, {"cusparseOrder_t", std::make_shared("oneapi::mkl::layout")}, {"cusparseDnVecDescr_t", - std::make_shared("std::shared_ptr<" + getLibraryHelperNamespace() + + std::make_shared("std::shared_ptr<" + + getLibraryHelperNamespace() + "sparse::dense_vector_desc>")}, {"cusparseConstDnVecDescr_t", - std::make_shared("std::shared_ptr<" + getLibraryHelperNamespace() + + std::make_shared("std::shared_ptr<" + + getLibraryHelperNamespace() + "sparse::dense_vector_desc>")}, {"cusparseSpMatDescr_t", std::make_shared(getLibraryHelperNamespace() + @@ -773,21 +808,22 @@ void MapNames::setExplicitNamespaceMap( {"cudaLaunchAttributeValue", std::make_shared("int")}, {"cusparseSpSMDescr_t", std::make_shared("int")}, {"cusparseSpSMAlg_t", std::make_shared("int")}, - {"cublasLtHandle_t", - std::make_shared( - getLibraryHelperNamespace() + "blas_gemm::experimental::descriptor_ptr")}, - {"cublasLtMatmulDesc_t", - std::make_shared( - getLibraryHelperNamespace() + "blas_gemm::experimental::matmul_desc_ptr")}, + {"cublasLtHandle_t", std::make_shared( + getLibraryHelperNamespace() + + "blas_gemm::experimental::descriptor_ptr")}, + {"cublasLtMatmulDesc_t", std::make_shared( + getLibraryHelperNamespace() + + "blas_gemm::experimental::matmul_desc_ptr")}, {"cublasLtOrder_t", std::make_shared(getLibraryHelperNamespace() + "blas_gemm::experimental::order_t")}, - {"cublasLtPointerMode_t", - std::make_shared( - getLibraryHelperNamespace() + "blas_gemm::experimental::pointer_mode_t")}, + {"cublasLtPointerMode_t", std::make_shared( + getLibraryHelperNamespace() + + "blas_gemm::experimental::pointer_mode_t")}, {"cublasLtMatrixLayout_t", std::make_shared( - getLibraryHelperNamespace() + "blas_gemm::experimental::matrix_layout_ptr")}, + getLibraryHelperNamespace() + + "blas_gemm::experimental::matrix_layout_ptr")}, {"cublasLtMatrixLayoutAttribute_t", std::make_shared( getLibraryHelperNamespace() + @@ -805,7 +841,8 @@ void MapNames::setExplicitNamespaceMap( std::make_shared("int")}, {"cublasLtMatrixTransformDesc_t", std::make_shared( - getLibraryHelperNamespace() + "blas_gemm::experimental::transform_desc_ptr")}, + getLibraryHelperNamespace() + + "blas_gemm::experimental::transform_desc_ptr")}, {"cudaGraphicsMapFlags", std::make_shared("int")}, {"cudaGraphicsRegisterFlags", std::make_shared("int")}, // ... @@ -1281,15 +1318,30 @@ void MapNames::setExplicitNamespaceMap( HelperFeatureEnum::device_ext)}, // enum Memcpy Kind {"cudaMemcpyHostToHost", - std::make_shared(getDpctNamespace() + "host_to_host")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "host_to_host")}, {"cudaMemcpyHostToDevice", - std::make_shared(getDpctNamespace() + "host_to_device")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "host_to_device")}, {"cudaMemcpyDeviceToHost", - std::make_shared(getDpctNamespace() + "device_to_host")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "device_to_host")}, {"cudaMemcpyDeviceToDevice", - std::make_shared(getDpctNamespace() + "device_to_device")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "device_to_device")}, {"cudaMemcpyDefault", - std::make_shared(getDpctNamespace() + "automatic")}, + std::make_shared( + getDpctNamespace() + + (DpctGlobalInfo::useSYCLCompat() ? "experimental::" : "") + + "automatic")}, // enum cudaMemory Type {"cudaMemoryTypeHost", std::make_shared(getClNamespace() + "usm::alloc::host", @@ -1448,11 +1500,11 @@ void MapNames::setExplicitNamespaceMap( std::make_shared(getDpctNamespace() + "pointer_attributes::type::memory_type")}, {"CU_POINTER_ATTRIBUTE_DEVICE_POINTER", - std::make_shared(getDpctNamespace() + - "pointer_attributes::type::device_pointer")}, + std::make_shared( + getDpctNamespace() + "pointer_attributes::type::device_pointer")}, {"CU_POINTER_ATTRIBUTE_HOST_POINTER", - std::make_shared(getDpctNamespace() + - "pointer_attributes::type::host_pointer")}, + std::make_shared( + getDpctNamespace() + "pointer_attributes::type::host_pointer")}, {"CU_POINTER_ATTRIBUTE_P2P_TOKENS", std::make_shared(getDpctNamespace() + "pointer_attributes::type::unsupported")}, @@ -1494,7 +1546,7 @@ void MapNames::setExplicitNamespaceMap( {"CU_MEMORYTYPE_UNIFIED", std::make_shared(getClNamespace() + "usm::alloc::shared", HelperFeatureEnum::device_ext)}, - + // enum CUlimit {"CU_LIMIT_PRINTF_FIFO_SIZE", std::make_shared("INT_MAX")}, @@ -1556,75 +1608,99 @@ void MapNames::setExplicitNamespaceMap( "image_data_type::pitch", HelperFeatureEnum::device_ext)}, // enum libraryPropertyType_t - {"MAJOR_VERSION", std::make_shared( - getLibraryHelperNamespace() + "version_field::major", - HelperFeatureEnum::device_ext)}, - {"MINOR_VERSION", std::make_shared( - getLibraryHelperNamespace() + "version_field::update", - HelperFeatureEnum::device_ext)}, + {"MAJOR_VERSION", + std::make_shared(getLibraryHelperNamespace() + + "version_field::major", + HelperFeatureEnum::device_ext)}, + {"MINOR_VERSION", + std::make_shared(getLibraryHelperNamespace() + + "version_field::update", + HelperFeatureEnum::device_ext)}, {"PATCH_LEVEL", std::make_shared( getLibraryHelperNamespace() + "version_field::patch", HelperFeatureEnum::device_ext)}, // enum cudaDataType_t - {"CUDA_R_16F", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_half")}, - {"CUDA_C_16F", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_half")}, + {"CUDA_R_16F", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_half")}, + {"CUDA_C_16F", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_half")}, {"CUDA_R_16BF", std::make_shared(getLibraryHelperNamespace() + "library_data_t::real_bfloat16")}, {"CUDA_C_16BF", std::make_shared(getLibraryHelperNamespace() + "library_data_t::complex_bfloat16")}, - {"CUDA_R_32F", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_float")}, - {"CUDA_C_32F", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_float")}, - {"CUDA_R_64F", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_double")}, + {"CUDA_R_32F", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_float")}, + {"CUDA_C_32F", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_float")}, + {"CUDA_R_64F", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_double")}, {"CUDA_C_64F", std::make_shared(getLibraryHelperNamespace() + "library_data_t::complex_double")}, - {"CUDA_R_4I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_int4")}, - {"CUDA_C_4I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_int4")}, - {"CUDA_R_4U", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_uint4")}, - {"CUDA_C_4U", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_uint4")}, - {"CUDA_R_8I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_int8")}, - {"CUDA_C_8I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_int8")}, - {"CUDA_R_8U", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_uint8")}, - {"CUDA_C_8U", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_uint8")}, - {"CUDA_R_16I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_int16")}, - {"CUDA_C_16I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_int16")}, - {"CUDA_R_16U", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_uint16")}, + {"CUDA_R_4I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_int4")}, + {"CUDA_C_4I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_int4")}, + {"CUDA_R_4U", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_uint4")}, + {"CUDA_C_4U", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_uint4")}, + {"CUDA_R_8I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_int8")}, + {"CUDA_C_8I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_int8")}, + {"CUDA_R_8U", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_uint8")}, + {"CUDA_C_8U", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_uint8")}, + {"CUDA_R_16I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_int16")}, + {"CUDA_C_16I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_int16")}, + {"CUDA_R_16U", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_uint16")}, {"CUDA_C_16U", std::make_shared(getLibraryHelperNamespace() + "library_data_t::complex_uint16")}, - {"CUDA_R_32I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_int32")}, - {"CUDA_C_32I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_int32")}, - {"CUDA_R_32U", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_uint32")}, + {"CUDA_R_32I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_int32")}, + {"CUDA_C_32I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_int32")}, + {"CUDA_R_32U", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_uint32")}, {"CUDA_C_32U", std::make_shared(getLibraryHelperNamespace() + "library_data_t::complex_uint32")}, - {"CUDA_R_64I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_int64")}, - {"CUDA_C_64I", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::complex_int64")}, - {"CUDA_R_64U", std::make_shared( - getLibraryHelperNamespace() + "library_data_t::real_uint64")}, + {"CUDA_R_64I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_int64")}, + {"CUDA_C_64I", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::complex_int64")}, + {"CUDA_R_64U", + std::make_shared(getLibraryHelperNamespace() + + "library_data_t::real_uint64")}, {"CUDA_C_64U", std::make_shared(getLibraryHelperNamespace() + "library_data_t::complex_uint64")}, @@ -1658,26 +1734,26 @@ void MapNames::setExplicitNamespaceMap( {"cuda::memory_order_seq_cst", std::make_shared(getClNamespace() + "memory_order::seq_cst")}, - {"CUFFT_R2C", - std::make_shared( - getLibraryHelperNamespace() + "fft::fft_type::real_float_to_complex_float", - HelperFeatureEnum::device_ext)}, - {"CUFFT_C2R", - std::make_shared( - getLibraryHelperNamespace() + "fft::fft_type::complex_float_to_real_float", - HelperFeatureEnum::device_ext)}, - {"CUFFT_D2Z", - std::make_shared( - getLibraryHelperNamespace() + "fft::fft_type::real_double_to_complex_double", - HelperFeatureEnum::device_ext)}, - {"CUFFT_Z2D", - std::make_shared( - getLibraryHelperNamespace() + "fft::fft_type::complex_double_to_real_double", - HelperFeatureEnum::device_ext)}, - {"CUFFT_C2C", - std::make_shared( - getLibraryHelperNamespace() + "fft::fft_type::complex_float_to_complex_float", - HelperFeatureEnum::device_ext)}, + {"CUFFT_R2C", std::make_shared( + getLibraryHelperNamespace() + + "fft::fft_type::real_float_to_complex_float", + HelperFeatureEnum::device_ext)}, + {"CUFFT_C2R", std::make_shared( + getLibraryHelperNamespace() + + "fft::fft_type::complex_float_to_real_float", + HelperFeatureEnum::device_ext)}, + {"CUFFT_D2Z", std::make_shared( + getLibraryHelperNamespace() + + "fft::fft_type::real_double_to_complex_double", + HelperFeatureEnum::device_ext)}, + {"CUFFT_Z2D", std::make_shared( + getLibraryHelperNamespace() + + "fft::fft_type::complex_double_to_real_double", + HelperFeatureEnum::device_ext)}, + {"CUFFT_C2C", std::make_shared( + getLibraryHelperNamespace() + + "fft::fft_type::complex_float_to_complex_float", + HelperFeatureEnum::device_ext)}, {"CUFFT_Z2Z", std::make_shared( getLibraryHelperNamespace() + "fft::fft_type::complex_double_to_complex_double", diff --git a/clang/lib/DPCT/Rewriters/RewriterSYCLcompat.cpp b/clang/lib/DPCT/Rewriters/RewriterSYCLcompat.cpp index 53255c983a37..2d5f54e75b3e 100644 --- a/clang/lib/DPCT/Rewriters/RewriterSYCLcompat.cpp +++ b/clang/lib/DPCT/Rewriters/RewriterSYCLcompat.cpp @@ -40,6 +40,27 @@ void initRewriterMapSYCLcompatUnsupport( RewriterMap.insert({ #include "../APINamesGraph.inc" #include "../APINamesTexture.inc" +SYCLCOMPAT_UNSUPPORT("cudaMemcpy2DArrayToArray") +SYCLCOMPAT_UNSUPPORT("cudaMemcpy2DFromArray") +SYCLCOMPAT_UNSUPPORT("cudaMemcpy2DFromArrayAsync") +SYCLCOMPAT_UNSUPPORT("cudaMemcpy2DToArray") +SYCLCOMPAT_UNSUPPORT("cudaMemcpy2DToArrayAsync") +SYCLCOMPAT_UNSUPPORT("cudaMemcpyArrayToArray") +SYCLCOMPAT_UNSUPPORT("cudaMemcpyToArray") +SYCLCOMPAT_UNSUPPORT("cudaMemcpyToArrayAsync") +SYCLCOMPAT_UNSUPPORT("cudaMemcpyFromArray") +SYCLCOMPAT_UNSUPPORT("cudaMemcpyFromArrayAsync") +SYCLCOMPAT_UNSUPPORT("cuMemcpyAtoH_v2") +SYCLCOMPAT_UNSUPPORT("cuMemcpyHtoA_v2") +SYCLCOMPAT_UNSUPPORT("cuMemcpyAtoHAsync_v2") +SYCLCOMPAT_UNSUPPORT("cuMemcpyHtoAAsync_v2") +SYCLCOMPAT_UNSUPPORT("cuMemcpyAtoD_v2") +SYCLCOMPAT_UNSUPPORT("cuMemcpyDtoA_v2") +SYCLCOMPAT_UNSUPPORT("cuMemcpyAtoA_v2") +SYCLCOMPAT_UNSUPPORT("cuMemcpyPeer") +SYCLCOMPAT_UNSUPPORT("cuMemcpyPeerAsync") +SYCLCOMPAT_UNSUPPORT("cudaMemcpyPeer") +SYCLCOMPAT_UNSUPPORT("cudaMemcpyPeerAsync") }); } diff --git a/clang/test/dpct/driver-mem-syclcompat.cu b/clang/test/dpct/driver-mem-syclcompat.cu new file mode 100644 index 000000000000..7a5860555f4d --- /dev/null +++ b/clang/test/dpct/driver-mem-syclcompat.cu @@ -0,0 +1,351 @@ +// RUN: dpct --format-range=none -use-syclcompat -out-root %T/driver-mem-syclcompat %s --cuda-include-path="%cuda-path/include" +// RUN: FileCheck --match-full-lines --input-file %T/driver-mem-syclcompat/driver-mem-syclcompat.dp.cpp %s +// RUN: %if build_lit %{icpx -c -fsycl -DBUILD_TEST %T/driver-mem-syclcompat/driver-mem-syclcompat.dp.cpp -o %T/driver-mem/driver-mem-syclcompat.dp.o %} + +#include +#include +#include + +#define CALL(x) x + +void cuCheckError(CUresult err) { +} + +int main(){ + size_t result1, result2; + int size = 32; + size_t count = 32; + float* f_A; + CUresult r; + // CHECK: f_A = (float *)sycl::malloc_host(size, q_ct1); + cuMemHostAlloc((void **)&f_A, size, CU_MEMHOSTALLOC_DEVICEMAP); + + + CUdeviceptr p1; + [&p1]() { + //CHECK:p1 = 0; + p1 = 0; + }(); + + CUdeviceptr p2; + [&]() { + //CHECK:p2 = 0; + p2 = 0; + }(); + + // CHECK: syclcompat::device_ptr f_D = 0; + CUdeviceptr f_D = 0; + // CHECK: syclcompat::device_ptr f_D2 = 0; + CUdeviceptr f_D2 = 0; + // CHECK: int c1, c2; + CUcontext c1, c2; + // CHECK: f_D = (syclcompat::device_ptr)sycl::malloc_device(size, q_ct1); + cuMemAlloc(&f_D, size); + + // CHECK: syclcompat::queue_ptr stream; + CUstream stream; + // CHECK: stream->memcpy(f_D, f_A, size); + cuMemcpyHtoDAsync(f_D, f_A, size, stream); + // CHECK: q_ct1.memcpy(f_D, f_A, size); + cuMemcpyHtoDAsync(f_D, f_A, size, 0); + // CHECK: CALL(q_ct1.memcpy(f_D, f_A, size).wait()); + CALL(cuMemcpyHtoD(f_D, f_A, size)); + + // CHECK: stream->memcpy(f_A, f_D, size); + cuMemcpyDtoHAsync(f_A, f_D, size, stream); + // CHECK: q_ct1.memcpy(f_A, f_D, size); + cuMemcpyDtoHAsync(f_A, f_D, size, 0); + // CHECK: q_ct1.memcpy(f_A, f_D, size).wait(); + cuMemcpyDtoH(f_A, f_D, size); + + // CHECK: stream->memcpy(f_D, f_D2, size); + cuMemcpyDtoDAsync(f_D, f_D2, size, stream); + // CHECK: r = SYCLCOMPAT_CHECK_ERROR(stream->memcpy(f_D, f_D2, size)); + r = cuMemcpyDtoDAsync(f_D, f_D2, size, stream); + + // CHECK: q_ct1.memcpy(f_D, f_D2, size); + cuMemcpyDtoDAsync(f_D, f_D2, size, 0); + // CHECK: r = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(f_D, f_D2, size)); + r = cuMemcpyDtoDAsync(f_D, f_D2, size, 0); + + // CHECK: q_ct1.memcpy(f_D, f_D2, size).wait(); + cuMemcpyDtoD(f_D, f_D2, size); + // CHECK: r = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(f_D, f_D2, size).wait()); + r = cuMemcpyDtoD(f_D, f_D2, size); + + // CHECK: q_ct1.memcpy(f_D, f_D2, size).wait(); + cuMemcpy(f_D, f_D2, size); + // CHECK: CALL(q_ct1.memcpy(f_D, f_D2, size).wait()); + CALL(cuMemcpy(f_D, f_D2, size)); + // CHECK: r = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(f_D, f_D2, size).wait()); + r = cuMemcpy(f_D, f_D2, size); + + // CHECK: stream->memcpy(f_D, f_D2, size); + cuMemcpyAsync(f_D, f_D2, size, stream); + // CHECK: CALL(stream->memcpy(f_D, f_D2, size)); + CALL(cuMemcpyAsync(f_D, f_D2, size, stream)); + // CHECK: r = SYCLCOMPAT_CHECK_ERROR(stream->memcpy(f_D, f_D2, size)); + r = cuMemcpyAsync(f_D, f_D2, size, stream); + + // CHECK: q_ct1.memcpy(f_D, f_D2, size); + cuMemcpyAsync(f_D, f_D2, size, 0); + // CHECK: CALL(q_ct1.memcpy(f_D, f_D2, size)); + CALL(cuMemcpyAsync(f_D, f_D2, size, 0)); + // CHECK: r = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(f_D, f_D2, size)); + r = cuMemcpyAsync(f_D, f_D2, size, 0); + +#ifndef BUILD_TEST + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cuMemcpyPeer" is not supported with SYCLcompat currently, please adjust the code manually. + cuMemcpyPeer(f_D, c1, f_D2, c2, size); + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cuMemcpyPeerAsync" is not supported with SYCLcompat currently, please adjust the code manually. + cuMemcpyPeerAsync(f_D, c1, f_D2, c2, size, stream); +#endif + + unsigned int v32 = 50000; + unsigned short v16 = 20000; + unsigned char v8 = (unsigned char) 200; + //CHECK: syclcompat::memset_d32(f_D, v32, size); + //CHECK-NEXT: syclcompat::memset_d16(f_D, v16, size * 2); + //CHECK-NEXT: syclcompat::memset(f_D, v8, size * 4); + //CHECK-NEXT: syclcompat::memset_d32_async(f_D, v32, size, *stream); + //CHECK-NEXT: syclcompat::memset_d16_async(f_D, v16, size * 2, *stream); + //CHECK-NEXT: syclcompat::memset_async(f_D, v8, size * 4, *stream); + //CHECK-NEXT: syclcompat::memset_d32(f_D, 1, v32, 4, 6); + //CHECK-NEXT: syclcompat::memset_d16(f_D, 1, v16, 4 * 2, 6); + //CHECK-NEXT: syclcompat::memset(f_D, 1, v8, 4 * 4, 6); + //CHECK-NEXT: syclcompat::memset_d32_async(f_D, 1, v32, 4, 6, *stream); + //CHECK-NEXT: syclcompat::memset_d16_async(f_D, 1, v16, 4 * 2, 6, *stream); + //CHECK-NEXT: syclcompat::memset_async(f_D, 1, v8, 4 * 4, 6, *stream); + cuMemsetD32(f_D, v32, size); + cuMemsetD16(f_D, v16, size * 2); + cuMemsetD8(f_D, v8, size * 4); + cuMemsetD32Async(f_D, v32, size, stream); + cuMemsetD16Async(f_D, v16, size * 2, stream); + cuMemsetD8Async(f_D, v8, size * 4, stream); + cuMemsetD2D32(f_D, 1, v32, 4, 6); + cuMemsetD2D16(f_D, 1, v16, 4 * 2, 6); + cuMemsetD2D8(f_D, 1, v8, 4 * 4, 6); + cuMemsetD2D32Async(f_D, 1, v32, 4, 6, stream); + cuMemsetD2D16Async(f_D, 1, v16, 4 * 2, 6, stream); + cuMemsetD2D8Async(f_D, 1, v8, 4 * 4, 6, stream); + + // CHECK: syclcompat::experimental::memcpy_parameter cpy; + // CHECK-NEXT: cpy.to.pitched.set_data_ptr(f_A); + // CHECK-NEXT: cpy.to.pitched.set_pitch(20); + // CHECK-NEXT: cpy.to.pos[1] = 10; + // CHECK-NEXT: cpy.to.pos[0] = 15; + // CHECK-EMPTY: + // CHECK-NEXT: cpy.from.pitched.set_data_ptr(f_D); + // CHECK-NEXT: cpy.from.pitched.set_pitch(20); + // CHECK-NEXT: cpy.from.pos[1] = 10; + // CHECK-NEXT: cpy.from.pos[0] = 15; + // CHECK-EMPTY: + // CHECK-NEXT: cpy.size[0] = 4; + // CHECK-NEXT: cpy.size[1] = 7; + CUDA_MEMCPY2D cpy; + cpy.dstMemoryType = CU_MEMORYTYPE_HOST; + cpy.dstHost = f_A; + cpy.dstPitch = 20; + cpy.dstY = 10; + cpy.dstXInBytes = 15; + + cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE; + cpy.srcDevice = f_D; + cpy.srcPitch = 20; + cpy.srcY = 10; + cpy.srcXInBytes = 15; + + cpy.WidthInBytes = 4; + cpy.Height = 7; + + // CHECK: syclcompat::experimental::memcpy(cpy); + cuMemcpy2D(&cpy); + // CHECK: syclcompat::experimental::memcpy_async(cpy, *stream); + cuMemcpy2DAsync(&cpy, stream); + + CUdeviceptr devicePtr; + + CUresult cu_err; + + CUdeviceptr cuDevPtr; + + CUdevice cudevice; + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: int advise = 0; + CUmem_advise advise = CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION; + + // CHECK: syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, advise); + cuMemAdvise(devicePtr, count, advise, cudevice); + + // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, advise))); + cuCheckError(cuMemAdvise(devicePtr, count, advise, cudevice)); + + // CHECK: cu_err = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, advise)); + cu_err = cuMemAdvise(devicePtr, count, advise, cudevice); + + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0); + cuMemAdvise(devicePtr, count, CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, cudevice); + + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0))); + cuCheckError(cuMemAdvise(devicePtr, count, CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, cudevice)); + + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0))); + cuCheckError(cuMemAdvise(devicePtr, count, (CUmem_advise)1, cudevice)); + + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0))); + cuCheckError(cuMemAdvise(devicePtr, count, CUmem_advise(1), cudevice)); + + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0))); + cuCheckError(cuMemAdvise(devicePtr, count, static_cast(1), cudevice)); + + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: cu_err = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0)); + cu_err = cuMemAdvise(devicePtr, count, CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, cudevice); + + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: syclcompat::get_device(cudevice).default_queue()->mem_advise(devicePtr, count, 0); + cuMemAdvise(devicePtr, count, CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, cudevice); + + // CHECK: /* + // CHECK-NEXT: DPCT1063:{{[0-9]+}}: Advice parameter is device-defined and was set to 0. You may need to adjust it. + // CHECK-NEXT: */ + // CHECK-NEXT: syclcompat::cpu_device().default_queue()->mem_advise(devicePtr, count, 0); + cuMemAdvise(devicePtr, count, CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION, CU_DEVICE_CPU); + + + CUdeviceptr devPtr; + CUresult curesult; + // CHECK: stream->prefetch(devPtr, 100); + cuMemPrefetchAsync (devPtr, 100, cudevice, stream); + // CHECK: (*&stream)->prefetch(devPtr, 100); + cuMemPrefetchAsync (devPtr, 100, cudevice, *&stream); + // CHECK: curesult = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + curesult = cuMemPrefetchAsync (devPtr, 100, cudevice, NULL); + // CHECK: syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100); + cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamPerThread); + // CHECK: curesult = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + curesult = cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamDefault); + // CHECK: curesult = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + curesult = cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamLegacy); + // CHECK: curesult = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + curesult = cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamPerThread); + // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + cuCheckError(cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamDefault)); + // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + cuCheckError(cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamLegacy)); + // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + cuCheckError(cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamPerThread)); + + // CHECK: syclcompat::experimental::memcpy_parameter cpy2; + // CHECK: /* + // CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "CUarray" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK-NEXT: */ + // CHECK-NEXT: CUarray ca; + // CHECK-NEXT: cpy2.to.image = ca; + // CHECK-NEXT: cpy2.to.pitched.set_pitch(5); + // CHECK-NEXT: cpy2.to.pitched.set_y(4); + // CHECK-NEXT: cpy2.to.pos[1] = 3; + // CHECK-NEXT: cpy2.to.pos[2] = 2; + // CHECK-NEXT: cpy2.to.pos[0] = 1; + // CHECK: cpy2.from.pitched.set_data_ptr(f_A); + // CHECK-NEXT: cpy2.from.pitched.set_pitch(5); + // CHECK-NEXT: cpy2.from.pitched.set_y(4); + // CHECK-NEXT: cpy2.from.pos[1] = 3; + // CHECK-NEXT: cpy2.from.pos[2] = 2; + // CHECK-NEXT: cpy2.from.pos[0] = 1; + // CHECK-EMPTY: + // CHECK-NEXT: cpy2.size[0] = 3; + // CHECK-NEXT: cpy2.size[1] = 2; + // CHECK-NEXT: cpy2.size[2] = 1; + CUDA_MEMCPY3D cpy2; +#ifndef BUILD_TEST + CUarray ca; + cpy2.dstMemoryType = CU_MEMORYTYPE_ARRAY; + cpy2.dstArray = ca; + cpy2.dstPitch = 5; + cpy2.dstHeight = 4; + cpy2.dstY = 3; + cpy2.dstZ = 2; + cpy2.dstXInBytes = 1; + cpy2.dstLOD = 0; +#endif + + cpy2.srcMemoryType = CU_MEMORYTYPE_HOST; + cpy2.srcHost = f_A; + cpy2.srcPitch = 5; + cpy2.srcHeight = 4; + cpy2.srcY = 3; + cpy2.srcZ = 2; + cpy2.srcXInBytes = 1; + cpy2.srcLOD = 0; + + cpy2.WidthInBytes = 3; + cpy2.Height = 2; + cpy2.Depth = 1; + + // CHECK: syclcompat::experimental::memcpy(cpy2); + cuMemcpy3D(&cpy2); + + CUstream cs; + // CHECK: syclcompat::experimental::memcpy_async(cpy2, *cs); + cuMemcpy3DAsync(&cpy2, cs); + + float *h_A = (float *)malloc(100); + // CHECK:sycl::free(h_A, q_ct1); + cuMemFreeHost(h_A); + // CHECK:sycl::free(f_D, q_ct1); + cuMemFree(f_D); + + unsigned int flags; + int host; + + + // CHECK: flags = 0; + cuMemHostGetFlags(&flags, &host); + // CHECK: cuCheckError(SYCLCOMPAT_CHECK_ERROR(flags = 0)); + cuCheckError(cuMemHostGetFlags(&flags, &host)); + + // CHECK: /* + // CHECK-NEXT: DPCT1026:{{[0-9]+}}: The call to cuMemHostRegister was removed because SYCL currently does not support registering of existing host memory for use by device. Use USM to allocate memory for use by host and device. + // CHECK-NEXT: */ + cuMemHostRegister(h_A, count, flags); + // CHECK: /* + // CHECK-NEXT: DPCT1027:{{[0-9]+}}: The call to cuMemHostRegister was replaced with 0 because SYCL currently does not support registering of existing host memory for use by device. Use USM to allocate memory for use by host and device. + // CHECK-NEXT: */ + // CHECK-NEXT: cuCheckError(0); + cuCheckError(cuMemHostRegister(h_A, count, flags)); + + + // CHECK: /* + // CHECK-NEXT: DPCT1026:{{[0-9]+}}: The call to cuMemHostUnregister was removed because SYCL currently does not support registering of existing host memory for use by device. Use USM to allocate memory for use by host and device. + // CHECK-NEXT: */ + cuMemHostUnregister(h_A); + + // CHECK: /* + // CHECK-NEXT: DPCT1027:{{[0-9]+}}: The call to cuMemHostUnregister was replaced with 0 because SYCL currently does not support registering of existing host memory for use by device. Use USM to allocate memory for use by host and device. + // CHECK-NEXT: */ + // CHECK-NEXT:cuCheckError(0); + cuCheckError(cuMemHostUnregister(h_A)); + return 0; +} diff --git a/clang/test/dpct/usm-syclcompat.cu b/clang/test/dpct/usm-syclcompat.cu new file mode 100644 index 000000000000..22b4b0a7e562 --- /dev/null +++ b/clang/test/dpct/usm-syclcompat.cu @@ -0,0 +1,1089 @@ +// UNSUPPORTED: system-windows +// RUN: dpct --format-range=none --use-syclcompat --usm-level=restricted -out-root %T/usm-syclcompat %s --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only +// RUN: FileCheck --match-full-lines --input-file %T/usm-syclcompat/usm-syclcompat.dp.cpp %s +// RUN: %if build_lit %{icpx -c -fsycl -DBUILD_TEST %T/usm-syclcompat/usm-syclcompat.dp.cpp -o %T/usm-syclcompat/usm-syclcompat.dp.o %} + +// CHECK: #include +// CHECK-NEXT: #include +#include +#include +#include +#include +#include + +#define MY_SAFE_CALL(CALL) do { \ + int Error = CALL; \ +} while (0) + +__constant__ float constData[123 * 4]; + +int foo_b(int a){ + return 0; +} + +void foo() { + // CHECK: syclcompat::device_ext &dev_ct1 = syclcompat::get_current_device(); + // CHECK-NEXT: sycl::queue &q_ct1 = *dev_ct1.default_queue(); + size_t size = 1234567 * sizeof(float); + float *h_A = (float *)malloc(size); + float *d_A = NULL; + int errorCode; + + cudaPitchedPtr p_A; + cudaExtent e; + cudaMemcpy3DParms parms, *parms_pointer; + cudaStream_t stream; + + /// malloc + // CHECK: d_A = (float *)sycl::malloc_device(size, q_ct1); + cudaMalloc((void **)&d_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(d_A = (float *)sycl::malloc_device(size, q_ct1)); + errorCode = cudaMalloc((void **)&d_A, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(d_A = (float *)sycl::malloc_device(size, q_ct1))); + MY_SAFE_CALL(cudaMalloc((void **)&d_A, size)); + + // CHECK: d_A = (float *)sycl::malloc_device(sizeof(sycl::double2) + size, q_ct1); + // CHECK-NEXT: d_A = (float *)sycl::malloc_device(sizeof(sycl::uchar4) + size, q_ct1); + // CHECK-NEXT: d_A = (float *)sycl::malloc_device(sizeof(d_A[0]), q_ct1); + cudaMalloc((void **)&d_A, sizeof(double2) + size); + cudaMalloc((void **)&d_A, sizeof(uchar4) + size); + cudaMalloc((void **)&d_A, sizeof(d_A[0])); + + // CHECK: d_A = (float *)syclcompat::malloc(size, size, size); + cudaMallocPitch((void **)&d_A, &size, size, size); + // CHECK: p_A = syclcompat::malloc(e); + cudaMalloc3D(&p_A, e); + + // CHECK: h_A = (float *)sycl::malloc_host(size, q_ct1); + cudaHostAlloc((void **)&h_A, size, cudaHostAllocDefault); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1)); + errorCode = cudaHostAlloc((void **)&h_A, size, cudaHostAllocDefault); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1))); + MY_SAFE_CALL(cudaHostAlloc((void **)&h_A, size, cudaHostAllocDefault)); + + // CHECK: /* + // CHECK-NEXT: DPCT1048:{{[0-9]+}}: The original value cudaHostAllocDefault is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code. + // CHECK-NEXT: */ + // CHECK-NEXT: h_A = (float *)sycl::malloc_host(sizeof(sycl::double2) - size, q_ct1); + cudaHostAlloc((void **)&h_A, sizeof(double2) - size, cudaHostAllocDefault); + // CHECK: /* + // CHECK-NEXT: DPCT1048:{{[0-9]+}}: The original value cudaHostAllocDefault is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code. + // CHECK-NEXT: */ + // CHECK-NEXT: h_A = (float *)sycl::malloc_host(sizeof(sycl::uchar4) - size, q_ct1); + cudaHostAlloc((void **)&h_A, sizeof(uchar4) - size, cudaHostAllocDefault); + + void *h_B = h_A; + // CHECK: h_A = (float *)sycl::malloc_host(size, q_ct1); + cudaMallocHost((void **)&h_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1)); + errorCode = cudaMallocHost((void **)&h_A, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1))); + MY_SAFE_CALL(cudaMallocHost((void **)&h_A, size)); + + // CHECK: h_A = (float *)sycl::malloc_host(size, q_ct1); + cuMemAllocHost((void **)&h_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1)); + errorCode = cuMemAllocHost((void **)&h_A, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1))); + MY_SAFE_CALL(cuMemAllocHost((void **)&h_A, size)); + + // CHECK: h_A = (float *)sycl::malloc_host(sizeof(sycl::double2) * size, q_ct1); + // CHECK-NEXT: h_A = (float *)sycl::malloc_host(sizeof(sycl::uchar4) * size, q_ct1); + cudaMallocHost((void **)&h_A, sizeof(double2) * size); + cudaMallocHost((void **)&h_A, sizeof(uchar4) * size); + + // CHECK: h_A = (float *)sycl::malloc_host(size, q_ct1); + cudaMallocHost(&h_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1)); + errorCode = cudaMallocHost(&h_A, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1))); + MY_SAFE_CALL(cudaMallocHost(&h_A, size)); + + // CHECK: h_A = (float *)sycl::malloc_host(sizeof(sycl::double2) / size, q_ct1); + // CHECK-NEXT: h_A = (float *)sycl::malloc_host(sizeof(sycl::uchar4) / size, q_ct1); + cudaMallocHost(&h_A, sizeof(double2) / size); + cudaMallocHost(&h_A, sizeof(uchar4) / size); + + float* buffer[2]; +#define SIZE_1 (128 * 1024 * 1024) + // CHECK: *buffer = sycl::malloc_host(SIZE_1, q_ct1); + // CHECK-NEXT: *(buffer + 1) = sycl::malloc_host(SIZE_1, q_ct1); + cudaMallocHost((void**)buffer, SIZE_1 * sizeof(float)); + cudaMallocHost((void**)(buffer + 1), SIZE_1 * sizeof(float)); +#undef SIZE_1 + + // CHECK: d_A = (float *)sycl::malloc_shared(size, q_ct1); + cudaMallocManaged((void **)&d_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(d_A = (float *)sycl::malloc_shared(size, q_ct1)); + errorCode = cudaMallocManaged((void **)&d_A, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(d_A = (float *)sycl::malloc_shared(size, q_ct1))); + MY_SAFE_CALL(cudaMallocManaged((void **)&d_A, size)); + + // CHECK: d_A = (float *)sycl::malloc_shared(sizeof(sycl::double2) + size + sizeof(sycl::uchar4), q_ct1); + // CHECK-NEXT: d_A = (float *)sycl::malloc_shared(sizeof(sycl::double2) * size * sizeof(sycl::uchar4), q_ct1); + cudaMallocManaged((void **)&d_A, sizeof(double2) + size + sizeof(uchar4)); + cudaMallocManaged((void **)&d_A, sizeof(double2) * size * sizeof(uchar4)); + + CUdeviceptr* D_ptr; + // CHECK: *D_ptr = (syclcompat::device_ptr)sycl::malloc_shared(size, q_ct1); + cuMemAllocManaged(D_ptr, size, CU_MEM_ATTACH_HOST); + + /// memcpy + + // CHECK: q_ct1.memcpy(d_A, h_A, size); + cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size)); + errorCode = cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size))); + MY_SAFE_CALL(cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice)); +#define MACRO_A(x) size +#define MACRO_A2(x) MACRO_A(x) +#define MACRO_B size +#define MACOR_C(x) cudaMemcpyDeviceToHost +#define MY_SAFE_CALL2(x) MY_SAFE_CALL(x) + //CHECK: MY_SAFE_CALL2(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size))); + MY_SAFE_CALL2(cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice)); + //CHECK: MY_SAFE_CALL2(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, MACRO_B))); + MY_SAFE_CALL2(cudaMemcpy(d_A, h_A, MACRO_B, cudaMemcpyDeviceToHost)); + //CHECK: MY_SAFE_CALL2(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, MACRO_A2(1)))); + MY_SAFE_CALL2(cudaMemcpy(d_A, h_A, MACRO_A2(1), MACOR_C(1))); + //CHECK: MY_SAFE_CALL2(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, foo_b(1)))); + MY_SAFE_CALL2(cudaMemcpy(d_A, h_A, foo_b(1), MACOR_C(1))); + +#define SIZE 100 + // CHECK: q_ct1.memcpy( d_A, h_A, sizeof(double)*SIZE*SIZE ).wait(); + cudaMemcpy( d_A, h_A, sizeof(double)*SIZE*SIZE, cudaMemcpyDeviceToHost ); + + /// memcpy async + + // CHECK: q_ct1.memcpy(d_A, h_A, size); + cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size)); + errorCode = cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size))); + MY_SAFE_CALL(cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice)); + + // CHECK: q_ct1.memcpy(d_A, h_A, size); + cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, 0); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size)); + errorCode = cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, 0); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size))); + MY_SAFE_CALL(cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, 0)); + + // CHECK: stream->memcpy(d_A, h_A, size); + cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, stream); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(stream->memcpy(d_A, h_A, size)); + errorCode = cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, stream); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(stream->memcpy(d_A, h_A, size))); + MY_SAFE_CALL(cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, stream)); + + // CHECK: syclcompat::memcpy(d_A, size, h_A, size, size, size); + cudaMemcpy2D(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice); + // CHECK: syclcompat::memcpy(h_A, size, d_A, size, size, size); + cudaMemcpy2D(h_A, size, d_A, size, size, size, cudaMemcpyDeviceToHost); + + // CHECK: syclcompat::experimental::memcpy(parms); + cudaMemcpy3D(&parms); + // CHECK: syclcompat::experimental::memcpy(*parms_pointer); + cudaMemcpy3D(parms_pointer); + + // CHECK: syclcompat::memcpy_async(d_A, size, h_A, size, size, size); + cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice); + // CHECK: syclcompat::memcpy_async(d_A, size, h_A, size, size, size); + cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, 0); + // CHECK: syclcompat::memcpy_async(d_A, size, h_A, size, size, size, *stream); + cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, stream); + + // CHECK: syclcompat::memcpy_async(h_A, size, d_A, size, size, size); + cudaMemcpy2DAsync(h_A, size, d_A, size, size, size, cudaMemcpyDeviceToHost); + // CHECK: syclcompat::memcpy_async(h_A, size, d_A, size, size, size); + cudaMemcpy2DAsync(h_A, size, d_A, size, size, size, cudaMemcpyDeviceToHost, 0); + // CHECK: syclcompat::memcpy_async(h_A, size, d_A, size, size, size, *stream); + cudaMemcpy2DAsync(h_A, size, d_A, size, size, size, cudaMemcpyDeviceToHost, stream); + + // CHECK: syclcompat::experimental::memcpy_async(parms); + cudaMemcpy3DAsync(&parms); + // CHECK: syclcompat::experimental::memcpy_async(parms); + cudaMemcpy3DAsync(&parms, 0); + // CHECK: syclcompat::experimental::memcpy_async(parms, *stream); + cudaMemcpy3DAsync(&parms, stream); + /// memcpy from symbol + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbol(h_A, constData, size, 1); + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbol(h_A, "constData", size, 1); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size)); + errorCode = cudaMemcpyFromSymbol(h_A, constData, size, 1); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbol(h_A, constData, size, 1)); + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbol(h_A, constData, size, 1, cudaMemcpyDeviceToHost); + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbol(h_A, "constData", size, 1, cudaMemcpyDeviceToHost); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size)); + errorCode = cudaMemcpyFromSymbol(h_A, constData, size, 1, cudaMemcpyDeviceToHost); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size).wait())); + MY_SAFE_CALL(cudaMemcpyFromSymbol(h_A, constData, size, 1, cudaMemcpyDeviceToHost)); + + /// memcpy from symbol async + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 1, cudaMemcpyDeviceToHost); + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbolAsync(h_A, "constData", size, 1, cudaMemcpyDeviceToHost); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size)); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 1, cudaMemcpyDeviceToHost); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 1, cudaMemcpyDeviceToHost)); + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 2, size); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 2, cudaMemcpyDeviceToHost, 0); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 2, size)); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 2, cudaMemcpyDeviceToHost, 0); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 2, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 2, cudaMemcpyDeviceToHost, 0)); + + // CHECK: stream->memcpy(h_A, (char *)(constData.get_ptr(*stream)) + 3, size); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, stream); + // CHECK: stream->memcpy(h_A, (char *)(constData.get_ptr(*stream)) + 3, size); + cudaMemcpyFromSymbolAsync(h_A, "constData", size, 3, cudaMemcpyDeviceToHost, stream); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(stream->memcpy(h_A, (char *)(constData.get_ptr(*stream)) + 3, size)); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, stream); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(stream->memcpy(h_A, (char *)(constData.get_ptr(*stream)) + 3, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, stream)); + + /// memcpy to symbol + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbol(constData, h_A, size, 1); + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbol("constData", h_A, size, 1); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + errorCode = cudaMemcpyToSymbol(constData, h_A, size, 1); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbol(constData, h_A, size, 1)); + + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbol(constData, h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbol("constData", h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + errorCode = cudaMemcpyToSymbol(constData, h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size).wait())); + MY_SAFE_CALL(cudaMemcpyToSymbol(constData, h_A, size, 1, cudaMemcpyHostToDevice)); + + /// memcpy to symbol async + + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbolAsync("constData", h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice)); + + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 2, h_A, size); + cudaMemcpyToSymbolAsync(constData, h_A, size, 2, cudaMemcpyHostToDevice, 0); + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 2, h_A, size); + cudaMemcpyToSymbolAsync("constData", h_A, size, 2, cudaMemcpyHostToDevice, 0); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 2, h_A, size)); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 2, cudaMemcpyHostToDevice, 0); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 2, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 2, cudaMemcpyHostToDevice, 0)); + + // CHECK: stream->memcpy((char *)(constData.get_ptr(*stream)) + 3, h_A, size); + cudaMemcpyToSymbolAsync(constData, h_A, size, 3, cudaMemcpyHostToDevice, stream); + // CHECK: stream->memcpy((char *)(constData.get_ptr(*stream)) + 3, h_A, size); + cudaMemcpyToSymbolAsync("constData", h_A, size, 3, cudaMemcpyHostToDevice, stream); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(stream->memcpy((char *)(constData.get_ptr(*stream)) + 3, h_A, size)); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 3, cudaMemcpyHostToDevice, stream); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(stream->memcpy((char *)(constData.get_ptr(*stream)) + 3, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 3, cudaMemcpyHostToDevice, stream)); + + /// memset + + // CHECK: q_ct1.memset(d_A, 23, size).wait(); + cudaMemset(d_A, 23, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size).wait()); + errorCode = cudaMemset(d_A, 23, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size).wait())); + MY_SAFE_CALL(cudaMemset(d_A, 23, size)); + + /// memset async + + // CHECK: q_ct1.memset(d_A, 23, size); + cudaMemsetAsync(d_A, 23, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size)); + errorCode = cudaMemsetAsync(d_A, 23, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size))); + MY_SAFE_CALL(cudaMemsetAsync(d_A, 23, size)); + + // CHECK: q_ct1.memset(d_A, 23, size); + cudaMemsetAsync(d_A, 23, size, 0); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size)); + errorCode = cudaMemsetAsync(d_A, 23, size, 0); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size))); + MY_SAFE_CALL(cudaMemsetAsync(d_A, 23, size, 0)); + + // CHECK: stream->memset(d_A, 23, size); + cudaMemsetAsync(d_A, 23, size, stream); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(stream->memset(d_A, 23, size)); + errorCode = cudaMemsetAsync(d_A, 23, size, stream); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(stream->memset(d_A, 23, size))); + MY_SAFE_CALL(cudaMemsetAsync(d_A, 23, size, stream)); + + // CHECK: syclcompat::memset(d_A, size, 0xf, size, size); + cudaMemset2D(d_A, size, 0xf, size, size); + // CHECK: syclcompat::memset(p_A, 0xf, e); + cudaMemset3D(p_A, 0xf, e); + + // CHECK: syclcompat::memset_async(d_A, size, 0xf, size, size); + cudaMemset2DAsync(d_A, size, 0xf, size, size); + // CHECK: syclcompat::memset_async(d_A, size, 0xf, size, size); + cudaMemset2DAsync(d_A, size, 0xf, size, size, 0); + // CHECK: syclcompat::memset_async(d_A, size, 0xf, size, size, *stream); + cudaMemset2DAsync(d_A, size, 0xf, size, size, stream); + + // CHECK: syclcompat::memset_async(p_A, 0xf, e); + cudaMemset3DAsync(p_A, 0xf, e); + // CHECK: syclcompat::memset_async(p_A, 0xf, e); + cudaMemset3DAsync(p_A, 0xf, e, 0); + // CHECK: syclcompat::memset_async(p_A, 0xf, e, *stream); + cudaMemset3DAsync(p_A, 0xf, e, stream); + + // CHECK: sycl::free(h_A, q_ct1); + cudaFreeHost(h_A); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(sycl::free(h_A, q_ct1)); + errorCode = cudaFreeHost(h_A); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(sycl::free(h_A, q_ct1))); + MY_SAFE_CALL(cudaFreeHost(h_A)); + + // CHECK: *(void **)&d_A = (float *)h_A; + cudaHostGetDevicePointer((void **)&d_A, h_A, 0); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(d_A = (float *)h_A); + errorCode = cudaHostGetDevicePointer(&d_A, h_A, 0); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(d_A = (float *)h_A)); + MY_SAFE_CALL(cudaHostGetDevicePointer(&d_A, h_A, 0)); + + // CHECK: *D_ptr = (syclcompat::device_ptr)h_A; + cuMemHostGetDevicePointer(D_ptr, h_A, 0); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(*D_ptr = (syclcompat::device_ptr)h_A); + errorCode = cuMemHostGetDevicePointer(D_ptr, h_A, 0); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(*D_ptr = (syclcompat::device_ptr)h_A)); + MY_SAFE_CALL(cuMemHostGetDevicePointer(D_ptr, h_A, 0)); + + cudaHostRegister(h_A, size, 0); + // CHECK: errorCode = 0; + errorCode = cudaHostRegister(h_A, size, 0); + // CHECK: MY_SAFE_CALL(0); + MY_SAFE_CALL(cudaHostRegister(h_A, size, 0)); + + cudaHostUnregister(h_A); + // CHECK: errorCode = 0; + errorCode = cudaHostUnregister(h_A); + // CHECK: MY_SAFE_CALL(0); + MY_SAFE_CALL(cudaHostUnregister(h_A)); +} + + +template +int foo2() { + // CHECK: syclcompat::device_ext &dev_ct1 = syclcompat::get_current_device(); + // CHECK-NEXT: sycl::queue &q_ct1 = *dev_ct1.default_queue(); + size_t size = 1234567 * sizeof(float); + float *h_A = (float *)malloc(size); + float *d_A = NULL; + int errorCode; + + cudaStream_t stream; + /// memcpy from symbol + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbol(h_A, constData, size, 1); + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbol(h_A, "constData", size, 1); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size)); + errorCode = cudaMemcpyFromSymbol(h_A, constData, size, 1); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbol(h_A, constData, size, 1)); + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbol(h_A, constData, size, 1, cudaMemcpyDeviceToHost); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size)); + errorCode = cudaMemcpyFromSymbol(h_A, constData, size, 1, cudaMemcpyDeviceToHost); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbol(h_A, constData, size, 1, cudaMemcpyDeviceToHost)); + + // CHECK: q_ct1.memcpy(h_A, constData.get_ptr(), size); + cudaMemcpyFromSymbol(h_A, constData, size); + // CHECK: q_ct1.memcpy(h_A, constData.get_ptr(), size); + cudaMemcpyFromSymbol(h_A, "constData", size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, constData.get_ptr(), size)); + errorCode = cudaMemcpyFromSymbol(h_A, constData, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, constData.get_ptr(), size).wait())); + MY_SAFE_CALL(cudaMemcpyFromSymbol(h_A, constData, size)); + + /// memcpy from symbol async + + // CHECK: q_ct1.memcpy(h_A, constData.get_ptr(), size); + cudaMemcpyFromSymbolAsync(h_A, constData, size); + // CHECK: q_ct1.memcpy(h_A, constData.get_ptr(), size); + cudaMemcpyFromSymbolAsync(h_A, "constData", size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, constData.get_ptr(), size)); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, constData.get_ptr(), size))); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size)); + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 1); + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbolAsync(h_A, "constData", size, 1); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size)); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 1); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 1)); + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 1, cudaMemcpyDeviceToHost); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size)); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 1, cudaMemcpyDeviceToHost); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 1, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 1, cudaMemcpyDeviceToHost)); + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 2, size); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 2, cudaMemcpyDeviceToHost, 0); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 2, size)); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 2, cudaMemcpyDeviceToHost, 0); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 2, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 2, cudaMemcpyDeviceToHost, 0)); + + // CHECK: stream->memcpy(h_A, (char *)(constData.get_ptr(*stream)) + 3, size); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, stream); + // CHECK: stream->memcpy(h_A, (char *)(constData.get_ptr(*stream)) + 3, size); + cudaMemcpyFromSymbolAsync(h_A, "constData", size, 3, cudaMemcpyDeviceToHost, stream); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(stream->memcpy(h_A, (char *)(constData.get_ptr(*stream)) + 3, size)); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, stream); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(stream->memcpy(h_A, (char *)(constData.get_ptr(*stream)) + 3, size))); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, stream)); + + /// memcpy to symbol + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbol(constData, h_A, size, 1); + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbol("constData", h_A, size, 1); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + errorCode = cudaMemcpyToSymbol(constData, h_A, size, 1); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbol(constData, h_A, size, 1)); + + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbol(constData, h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + errorCode = cudaMemcpyToSymbol(constData, h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbol(constData, h_A, size, 1, cudaMemcpyHostToDevice)); + + // CHECK: q_ct1.memcpy(constData.get_ptr(), h_A, size); + cudaMemcpyToSymbol(constData, h_A, size); + // CHECK: q_ct1.memcpy(constData.get_ptr(), h_A, size); + cudaMemcpyToSymbol("constData", h_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(constData.get_ptr(), h_A, size)); + errorCode = cudaMemcpyToSymbol(constData, h_A, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(constData.get_ptr(), h_A, size).wait())); + MY_SAFE_CALL(cudaMemcpyToSymbol(constData, h_A, size)); + + /// memcpy to symbol async + // CHECK: q_ct1.memcpy(constData.get_ptr(), h_A, size); + cudaMemcpyToSymbolAsync(constData, h_A, size); + // CHECK: q_ct1.memcpy(constData.get_ptr(), h_A, size); + cudaMemcpyToSymbolAsync("constData", h_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(constData.get_ptr(), h_A, size)); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(constData.get_ptr(), h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size)); + + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbolAsync(constData, h_A, size, 1); + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbolAsync("constData", h_A, size, 1); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 1); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 1)); + + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice)); + + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 2, h_A, size); + cudaMemcpyToSymbolAsync(constData, h_A, size, 2, cudaMemcpyHostToDevice, 0); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 2, h_A, size)); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 2, cudaMemcpyHostToDevice, 0); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 2, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 2, cudaMemcpyHostToDevice, 0)); + + // CHECK: stream->memcpy((char *)(constData.get_ptr(*stream)) + 3, h_A, size); + cudaMemcpyToSymbolAsync(constData, h_A, size, 3, cudaMemcpyHostToDevice, stream); + // CHECK: stream->memcpy((char *)(constData.get_ptr(*stream)) + 3, h_A, size); + cudaMemcpyToSymbolAsync("constData", h_A, size, 3, cudaMemcpyHostToDevice, stream); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(stream->memcpy((char *)(constData.get_ptr(*stream)) + 3, h_A, size)); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 3, cudaMemcpyHostToDevice, stream); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(stream->memcpy((char *)(constData.get_ptr(*stream)) + 3, h_A, size))); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 3, cudaMemcpyHostToDevice, stream)); +} + +template int foo2(); +template int foo2(); + +void foo3() { + size_t size = 1234567 * sizeof(float); + float *h_A = (float *)malloc(size); + float *d_A = NULL; + int errorCode; + cudaPitchedPtr p_A; + cudaExtent e; + cudaMemcpy3DParms parms; + int *data; + size_t width, height, depth, pitch, woffset, hoffset; + int deviceID = 0; + + // CHECK: auto s1 = std::make_shared((syclcompat::queue_ptr)&q_ct1); + // CHECK: auto s2 = std::make_shared(&q_ct1); + // CHECK: auto s3 = std::make_shared(&q_ct1); + auto s1 = std::make_shared((cudaStream_t)cudaStreamDefault); + auto s2 = std::make_shared(cudaStreamLegacy); + auto s3 = std::make_shared(cudaStreamPerThread); + + // CHECK: q_ct1.memcpy(d_A, h_A, size); + // CHECK: q_ct1.memcpy(d_A, h_A, size); + // CHECK: q_ct1.memcpy(d_A, h_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(d_A, h_A, size))); + cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamDefault); + cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamLegacy); + cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamPerThread); + errorCode = cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamDefault); + errorCode = cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamLegacy); + errorCode = cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpyAsync(d_A, h_A, size, cudaMemcpyHostToDevice, cudaStreamPerThread)); + + + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + // CHECK: q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy((char *)(constData.get_ptr()) + 1, h_A, size))); + cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamDefault); + cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamLegacy); + cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamPerThread); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamDefault); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamLegacy); + errorCode = cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpyToSymbolAsync(constData, h_A, size, 1, cudaMemcpyHostToDevice, cudaStreamPerThread)); + + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size); + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size); + // CHECK: q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(h_A, (char *)(constData.get_ptr()) + 3, size))); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamDefault); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamLegacy); + cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamPerThread); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamDefault); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamLegacy); + errorCode = cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpyFromSymbolAsync(h_A, constData, size, 3, cudaMemcpyDeviceToHost, cudaStreamPerThread)); + + // CHECK: syclcompat::memcpy_async(d_A, size, h_A, size, size, size); + // CHECK: syclcompat::memcpy_async(d_A, size, h_A, size, size, size); + // CHECK: syclcompat::memcpy_async(d_A, size, h_A, size, size, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(d_A, size, h_A, size, size, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(d_A, size, h_A, size, size, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(d_A, size, h_A, size, size, size)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(d_A, size, h_A, size, size, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(d_A, size, h_A, size, size, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memcpy_async(d_A, size, h_A, size, size, size))); + cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamDefault); + cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamLegacy); + cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamPerThread); + errorCode = cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamDefault); + errorCode = cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamLegacy); + errorCode = cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpy2DAsync(d_A, size, h_A, size, size, size, cudaMemcpyHostToDevice, cudaStreamPerThread)); + + // CHECK: syclcompat::experimental::memcpy_async(parms); + // CHECK: syclcompat::experimental::memcpy_async(parms); + // CHECK: syclcompat::experimental::memcpy_async(parms); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::experimental::memcpy_async(parms)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::experimental::memcpy_async(parms)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::experimental::memcpy_async(parms)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::experimental::memcpy_async(parms))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::experimental::memcpy_async(parms))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::experimental::memcpy_async(parms))); + cudaMemcpy3DAsync(&parms, cudaStreamDefault); + cudaMemcpy3DAsync(&parms, cudaStreamLegacy); + cudaMemcpy3DAsync(&parms, cudaStreamPerThread); + errorCode = cudaMemcpy3DAsync(&parms, cudaStreamDefault); + errorCode = cudaMemcpy3DAsync(&parms, cudaStreamLegacy); + errorCode = cudaMemcpy3DAsync(&parms, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpy3DAsync(&parms, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpy3DAsync(&parms, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpy3DAsync(&parms, cudaStreamPerThread)); + +#ifndef BUILD_TEST + cudaArray_t a1; + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamDefault); + cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamLegacy); + cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamPerThread); + errorCode = cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamDefault); + errorCode = cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamLegacy); + errorCode = cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpy2DFromArrayAsync(data, pitch, a1, woffset, hoffset, width, height, cudaMemcpyDeviceToHost, cudaStreamPerThread)); + + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpy2DToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamDefault); + cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamLegacy); + cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamPerThread); + errorCode = cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamDefault); + errorCode = cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamLegacy); + errorCode = cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpy2DToArrayAsync(a1, woffset, hoffset, data, pitch, width, height, cudaMemcpyDeviceToHost, cudaStreamPerThread)); + + + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyToArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamDefault); + cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamLegacy); + cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamPerThread); + errorCode = cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamDefault); + errorCode = cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamLegacy); + errorCode = cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpyToArrayAsync(a1, woffset, hoffset, data, width, cudaMemcpyDeviceToHost, cudaStreamPerThread)); + + + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + // CHECK: DPCT1131:{{[0-9]+}}: The migration of "cudaMemcpyFromArrayAsync" is not supported with SYCLcompat currently, please adjust the code manually. + cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamDefault); + cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamLegacy); + cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamPerThread); + errorCode = cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamDefault); + errorCode = cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamLegacy); + errorCode = cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemcpyFromArrayAsync(data, a1, woffset, hoffset, width, cudaMemcpyDeviceToHost, cudaStreamPerThread)); +#endif + + + // CHECK: q_ct1.memset(d_A, 23, size); + // CHECK: q_ct1.memset(d_A, 23, size); + // CHECK: q_ct1.memset(d_A, 23, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(q_ct1.memset(d_A, 23, size))); + cudaMemsetAsync(d_A, 23, size, cudaStreamDefault); + cudaMemsetAsync(d_A, 23, size, cudaStreamLegacy); + cudaMemsetAsync(d_A, 23, size, cudaStreamPerThread); + errorCode = cudaMemsetAsync(d_A, 23, size, cudaStreamDefault); + errorCode = cudaMemsetAsync(d_A, 23, size, cudaStreamLegacy); + errorCode = cudaMemsetAsync(d_A, 23, size, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemsetAsync(d_A, 23, size, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemsetAsync(d_A, 23, size, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemsetAsync(d_A, 23, size, cudaStreamPerThread)); + + + // CHECK: syclcompat::memset_async(d_A, size, 0xf, size, size); + // CHECK: syclcompat::memset_async(d_A, size, 0xf, size, size); + // CHECK: syclcompat::memset_async(d_A, size, 0xf, size, size); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(d_A, size, 0xf, size, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(d_A, size, 0xf, size, size)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(d_A, size, 0xf, size, size)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(d_A, size, 0xf, size, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(d_A, size, 0xf, size, size))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(d_A, size, 0xf, size, size))); + cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamDefault); + cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamLegacy); + cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamPerThread); + errorCode = cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamDefault); + errorCode = cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamLegacy); + errorCode = cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemset2DAsync(d_A, size, 0xf, size, size, cudaStreamPerThread)); + + + // CHECK: syclcompat::memset_async(p_A, 0xf, e); + // CHECK: syclcompat::memset_async(p_A, 0xf, e); + // CHECK: syclcompat::memset_async(p_A, 0xf, e); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(p_A, 0xf, e)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(p_A, 0xf, e)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(p_A, 0xf, e)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(p_A, 0xf, e))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(p_A, 0xf, e))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::memset_async(p_A, 0xf, e))); + cudaMemset3DAsync(p_A, 0xf, e, cudaStreamDefault); + cudaMemset3DAsync(p_A, 0xf, e, cudaStreamLegacy); + cudaMemset3DAsync(p_A, 0xf, e, cudaStreamPerThread); + errorCode = cudaMemset3DAsync(p_A, 0xf, e, cudaStreamDefault); + errorCode = cudaMemset3DAsync(p_A, 0xf, e, cudaStreamLegacy); + errorCode = cudaMemset3DAsync(p_A, 0xf, e, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemset3DAsync(p_A, 0xf, e, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemset3DAsync(p_A, 0xf, e, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemset3DAsync(p_A, 0xf, e, cudaStreamPerThread)); + + + // CHECK: syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100); + // CHECK: syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100); + // CHECK: syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(deviceID).default_queue()->prefetch(d_A,100))); + cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamDefault); + cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamLegacy); + cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamPerThread); + errorCode = cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamDefault); + errorCode = cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamLegacy); + errorCode = cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamPerThread); + MY_SAFE_CALL(cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamDefault)); + MY_SAFE_CALL(cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamLegacy)); + MY_SAFE_CALL(cudaMemPrefetchAsync (d_A, 100, deviceID, cudaStreamPerThread)); + // CHECK: int cudevice = 0; + CUdevice cudevice = 0; + // CHECK: syclcompat::device_ptr devPtr; + CUdeviceptr devPtr; + // CHECK: syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100); + // CHECK: syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100); + // CHECK: syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100)); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(syclcompat::get_device(cudevice).default_queue()->prefetch(devPtr, 100))); + cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamDefault); + cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamLegacy); + cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamPerThread); + errorCode = cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamDefault); + errorCode = cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamLegacy); + errorCode = cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamPerThread); + MY_SAFE_CALL(cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamDefault)); + MY_SAFE_CALL(cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamLegacy)); + MY_SAFE_CALL(cuMemPrefetchAsync (devPtr, 100, cudevice, cudaStreamPerThread)); +} + +/// cuda driver memory api +void foo4(){ + size_t size = 1234567 * sizeof(float); + float *h_A = (float *)malloc(size); + + int errorCode; + // CHECK: /* + // CHECK: DPCT1048:{{[0-9]+}}: The original value CU_MEMHOSTALLOC_PORTABLE is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code. + // CHECK: */ + // CHECK: h_A = (float *)sycl::malloc_host(size, q_ct1); + cuMemHostAlloc((void **)&h_A, size, CU_MEMHOSTALLOC_PORTABLE); + // CHECK: /* + // CHECK: DPCT1048:{{[0-9]+}}: The original value CU_MEMHOSTALLOC_PORTABLE is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code. + // CHECK: */ + // CHECK: errorCode = SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1)); + errorCode = cuMemHostAlloc((void **)&h_A, size, CU_MEMHOSTALLOC_PORTABLE); + // CHECK: /* + // CHECK: DPCT1048:{{[0-9]+}}: The original value CU_MEMHOSTALLOC_PORTABLE is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code. + // CHECK: */ + // CHECK: MY_SAFE_CALL(SYCLCOMPAT_CHECK_ERROR(h_A = (float *)sycl::malloc_host(size, q_ct1))); + MY_SAFE_CALL(cuMemHostAlloc((void **)&h_A, size, CU_MEMHOSTALLOC_PORTABLE)); + // CHECK: /* + // CHECK: DPCT1048:{{[0-9]+}}: The original value CU_MEMHOSTALLOC_PORTABLE is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code. + // CHECK: */ + // CHECK: h_A = (float *)sycl::malloc_host(sizeof(sycl::double2) - size, q_ct1); + cuMemHostAlloc((void **)&h_A, sizeof(double2) - size, CU_MEMHOSTALLOC_PORTABLE); + // CHECK: /* + // CHECK: DPCT1048:{{[0-9]+}}: The original value CU_MEMHOSTALLOC_PORTABLE is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code. + // CHECK: */ + // CHECK: h_A = (float *)sycl::malloc_host(sizeof(sycl::uchar4) - size, q_ct1); + cuMemHostAlloc((void **)&h_A, sizeof(uchar4) - size, CU_MEMHOSTALLOC_PORTABLE); +} + +#define MY_SAFE_CALL3(CALL) { \ + cudaError Error = CALL; \ + if (Error != cudaSuccess) { \ + printf("%s\n", cudaGetErrorString(Error)); \ + exit(Error); \ + } \ +} + +void foo5(float* a) { +// CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16))); +// CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16))); +// CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); +} + + +void foo6(float* a) { + // CHECK: printf("%d\n", SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + // CHECK: printf("%d\n", SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + printf("%d\n", cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + printf("%d\n", cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); +} + +__global__ void test_kernel() {} + +int foo7() { + unsigned int mem_size; + unsigned int *h_out_data; + unsigned int *h_data; + unsigned int *d_out_data; + unsigned int *d_in_data_1; + unsigned int *d_in_data_2; + int num_data; + + for (unsigned int i = 0; i < num_data; i++) + h_data[i] = i; + // CHECK: q_ct1.memcpy(d_in_data_1, h_data, mem_size).wait(); + cudaMemcpy(d_in_data_1, h_data, mem_size, cudaMemcpyHostToDevice); + + for (unsigned int i = 0; i < num_data; i++) + h_data[i] = num_data - 1 - i; + // CHECK: q_ct1.memcpy(d_in_data_2, h_data, mem_size); + cudaMemcpy(d_in_data_2, h_data, mem_size, cudaMemcpyHostToDevice); + + test_kernel<<<3, 3>>>(); + cudaDeviceSynchronize(); + // CHECK: q_ct1.memcpy(h_out_data, d_out_data, mem_size).wait(); + cudaMemcpy(h_out_data, d_out_data, mem_size, cudaMemcpyDeviceToHost); + + return 0; +} + +int foo8() { + unsigned int mem_size; + unsigned int *h_data; + unsigned int *d_in_data_1; + unsigned int *d_in_data_2; + + // CHECK: q_ct1.memcpy(d_in_data_1, h_data, mem_size); + cudaMemcpy(d_in_data_1, h_data, mem_size, cudaMemcpyHostToDevice); + // CHECK: q_ct1.memcpy(d_in_data_2, h_data, mem_size).wait(); + cudaMemcpy(d_in_data_2, h_data, mem_size, cudaMemcpyHostToDevice); + return 0; +} + +int foo9() { + unsigned int mem_size; + unsigned int *h_data; + unsigned int *d_in_data_1; + unsigned int *d_in_data_2; + unsigned int *test = d_in_data_1; + + // CHECK: q_ct1.memcpy(d_in_data_1, h_data, mem_size).wait(); + cudaMemcpy(d_in_data_1, h_data, mem_size, cudaMemcpyHostToDevice); + test; + // CHECK: q_ct1.memcpy(d_in_data_2, h_data, mem_size).wait(); + cudaMemcpy(d_in_data_2, h_data, mem_size, cudaMemcpyHostToDevice); + return 0; +} + +int foo10(unsigned int *test) { + unsigned int mem_size; + unsigned int *data_d, *data_h; + + // CHECK: q_ct1.memcpy(data_d, data_h, mem_size).wait(); + cudaMemcpy(data_d, data_h, mem_size, cudaMemcpyHostToDevice); + test; + // CHECK: q_ct1.memcpy(data_d, data_h, mem_size).wait(); + cudaMemcpy(data_d, data_h, mem_size, cudaMemcpyHostToDevice); + return 0; +} + +unsigned int *global_test; + +int foo11() { + unsigned int mem_size; + unsigned int *data_d, *data_h; + + // CHECK: q_ct1.memcpy(data_d, data_h, mem_size).wait(); + cudaMemcpy(data_d, data_h, mem_size, cudaMemcpyHostToDevice); + global_test; + // CHECK: q_ct1.memcpy(data_d, data_h, mem_size).wait(); + cudaMemcpy(data_d, data_h, mem_size, cudaMemcpyHostToDevice); + return 0; +} + +struct TEST { + unsigned int t; + void call() { + unsigned int mem_size; + unsigned int *data_d, *data_h; + // CHECK: q_ct1.memcpy(data_d, data_h, mem_size); + cudaMemcpy(data_d, data_h, mem_size, cudaMemcpyHostToDevice); + // CHECK: q_ct1.memcpy(data_d, data_h, mem_size).wait(); + cudaMemcpy(data_d, data_h, mem_size, cudaMemcpyHostToDevice); + } +}; + +int foo12() { + TEST test; + return 0; +} + +void foo13(float* a, bool flag) { + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16))); + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + while(flag) { + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16))); + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + if(flag) { + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16))); + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(constData.get_ptr(), a, 16).wait())); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + MY_SAFE_CALL3(cudaMemcpyToSymbol(constData, a, 16)); + } else { + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(constData.get_ptr(), a, 16))); + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + MY_SAFE_CALL3(cudaMemcpyToSymbol(constData, a, 16)); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + } + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16))); + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + } + + do { + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16))); + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, constData.get_ptr(), 16).wait())); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + MY_SAFE_CALL3(cudaMemcpyFromSymbol(a, constData, 16)); + } while(flag); + + for(;;) { + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, constData.get_ptr(), 16))); + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + MY_SAFE_CALL3(cudaMemcpyFromSymbol(a, constData, 16)); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + } + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16))); + // CHECK: MY_SAFE_CALL3(SYCLCOMPAT_CHECK_ERROR(q_ct1.memcpy(a, a, 16).wait())); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); + MY_SAFE_CALL3(cudaMemcpy(a, a, 16, cudaMemcpyDeviceToHost)); +} + +void foo14() { + int h_selected_num; + int *d_selected_num; + int *h_out; + int *d_out; + //CHECK:q_ct1.memcpy((void *)&h_selected_num, (void *)d_selected_num, sizeof(int)).wait(); + //CHECK-NEXT:q_ct1.memcpy((void *)h_out, (void *)d_out, h_selected_num * sizeof(int)).wait(); + cudaMemcpy((void *)&h_selected_num, (void *)d_selected_num, sizeof(int), cudaMemcpyDeviceToHost); + cudaMemcpy((void *)h_out, (void *)d_out, h_selected_num * sizeof(int), cudaMemcpyDeviceToHost); +} + +struct TEST_STR { + int a[10]; +}; + +void foo15() { + std::vector buf; + for (int i = 0; i < 32; i++) { + //CHECK: buf[i] = (volatile TEST_STR *)sycl::malloc_host(sizeof(TEST_STR), syclcompat::get_default_queue()); + cudaMallocHost(&buf[i], sizeof(TEST_STR)); + } +} + +void foo16() { + std::vector buf; + for (int i = 0; i < 32; i++) { + //CHECK: (buf.front()) = (volatile TEST_STR *)sycl::malloc_host(sizeof(TEST_STR), syclcompat::get_default_queue()); + cudaMallocHost(&buf.front(), sizeof(TEST_STR)); + } +} + +int foo17() { + unsigned int mem_size; + unsigned int *h_data; + unsigned int *d_in_data_1; + unsigned int *d_in_data_2; + + // CHECK: q_ct1.memcpy(d_in_data_1, h_data, mem_size).wait(); + cudaMemcpy(d_in_data_1, h_data, mem_size, cudaMemcpyHostToDevice); + h_data[0] = 1; + // CHECK: q_ct1.memcpy(d_in_data_2, h_data, mem_size).wait(); + cudaMemcpy(d_in_data_2, h_data, mem_size, cudaMemcpyHostToDevice); + return 0; +}