From f1781446d161880917bc0d159b62be746e80594c Mon Sep 17 00:00:00 2001 From: Bowen Bao Date: Wed, 24 Oct 2018 17:34:01 -0700 Subject: [PATCH] Support CUDA 10 * Move to support CUDA 10, cudnn 7.3, cub 1.8. * Fixed a bug related to "pointer to pin pointer is disallowed" #3063, which is exposed in newer version vctools. * Added workaround for a potential vs2017 15.9 bug with cntk Debug version. --- CNTK.Cpp.props | 14 ++++---- Documentation/current_iteration.md | 13 +++++++ Makefile | 6 ++-- Source/ActionsLib/NDLNetworkBuilder.h | 4 ++- .../Extensibility/EvalWrapper/EvalWrapper.cpp | 10 +++--- Source/Math/GPUMatrixCUDAKernels.cuh | 1 - Source/Math/cudalib.cpp | 2 ++ Source/Math/half.hpp | 5 ++- Tools/devInstall/Windows/DevInstall.ps1 | 6 ++-- .../devInstall/Windows/helper/Operations.ps1 | 34 +++++++++---------- .../Windows/helper/PreRequisites.ps1 | 4 +-- Tools/docker/CNTK-GPU-Image/Dockerfile | 16 ++++----- bindings/java/Swig/post-build.cmd | 2 +- configure | 10 +++--- 14 files changed, 72 insertions(+), 55 deletions(-) diff --git a/CNTK.Cpp.props b/CNTK.Cpp.props index 09dcda75ce03..94e48c21e91b 100644 --- a/CNTK.Cpp.props +++ b/CNTK.Cpp.props @@ -3,7 +3,7 @@ - 9.0 + 10.0 %ProgramW6432%\NVIDIA Corporation\NVSMI\nvml.dll c:\local\nvsmi9\NVSMI\nvml.dll @@ -110,10 +110,10 @@ libprotobufd.lib - - $(CUDA_PATH_V9_0) - cudart64_90.dll - cublas64_90.dll;cusparse64_90.dll;curand64_90.dll;$(CudaRuntimeDll) + + $(CUDA_PATH_V10_0) + cudart64_100.dll + cublas64_100.dll;cusparse64_100.dll;curand64_100.dll;$(CudaRuntimeDll) %(PreprocessorDefinitions);HAS_MPI=1 - %(PreprocessorDefinitions);CUDA_NO_HALF;__CUDA_NO_HALF_OPERATORS__ + %(PreprocessorDefinitions);CUDA_NO_HALF;__CUDA_NO_HALF_OPERATORS__ diff --git a/Documentation/current_iteration.md b/Documentation/current_iteration.md index b6f8c854e8b6..1e69523b6f70 100644 --- a/Documentation/current_iteration.md +++ b/Documentation/current_iteration.md @@ -3,3 +3,16 @@ ## Highlights of this release * Moved to CUDA 10 for both Windows and Linux. * Support advance RNN loop in ONNX export. + +## CUDA support for CUDA 10 + +CNTK now supports CUDA 10. This requires an update to build environment to Visual Studio 2017 v15.9 for Windows. + +To setup build and runtime environment on Windows: +* Install [Visual Studio 2017](https://www.visualstudio.com/downloads/). Note: going forward for CUDA 10 and beyond, it is no longer required to install and run with the specific VC Tools version 14.11. +* Install [Nvidia CUDA 10](https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64) +* From PowerShell, run: + [DevInstall.ps1](./Tools/devInstall/Windows/DevInstall.ps1) +* Start Visual Studio 2017 and open [CNTK.sln](./CNTK.sln). + +To setup build and runtime environment on Linux using docker, please build Unbuntu 16.04 docker image using Dockerfiles [here](./Tools/docker). For other Linux systems, please refer to the Dockerfiles to setup dependent libraries for CNTK. \ No newline at end of file diff --git a/Makefile b/Makefile index cc25d6550ebd..8910814bea3e 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ # CUDA_PATH= Path to CUDA # If not specified, GPU will not be enabled # CUB_PATH= path to NVIDIA CUB installation, so $(CUB_PATH)/cub/cub.cuh exists -# defaults to /usr/local/cub-1.4.1 +# defaults to /usr/local/cub-1.8.0 # CUDNN_PATH= path to NVIDIA cuDNN installation so $(CUDNN_PATH)/cuda/include/cudnn.h exists # CuDNN version needs to be 5.0 or higher. # KALDI_PATH= Path to Kaldi @@ -144,8 +144,8 @@ ifdef CUDA_PATH endif ifndef CUB_PATH - $(info defaulting CUB_PATH to /usr/local/cub-1.4.1) - CUB_PATH=/usr/local/cub-1.4.1 + $(info defaulting CUB_PATH to /usr/local/cub-1.8.0) + CUB_PATH=/usr/local/cub-1.8.0 endif DEVICE = gpu diff --git a/Source/ActionsLib/NDLNetworkBuilder.h b/Source/ActionsLib/NDLNetworkBuilder.h index ae082da19ab6..1f40ef1fd456 100644 --- a/Source/ActionsLib/NDLNetworkBuilder.h +++ b/Source/ActionsLib/NDLNetworkBuilder.h @@ -500,7 +500,9 @@ class NDLBuilder } } - Init(executionEngine, networkConfig, newConfig, dumpFileName, deviceId); + // workaround for VS2017 15.9.2 Debug Win32 Access Violation error. + wstring networkConfigWstring = networkConfig; + Init(executionEngine, networkConfigWstring, newConfig, dumpFileName, deviceId); } virtual ~NDLBuilder() diff --git a/Source/Extensibility/EvalWrapper/EvalWrapper.cpp b/Source/Extensibility/EvalWrapper/EvalWrapper.cpp index c7cc256480e2..8a8b544ae91b 100644 --- a/Source/Extensibility/EvalWrapper/EvalWrapper.cpp +++ b/Source/Extensibility/EvalWrapper/EvalWrapper.cpp @@ -231,7 +231,7 @@ public ref class IEvaluateModelManaged : IDisposable pin_ptr key = PtrToStringChars(item.Key); shared_ptr> ptr = CopyList(item.Value); sharedInputVectors.push_back(ptr); - stdInputs.insert(MapEntry(key, ptr.get())); + stdInputs.insert(MapEntry(static_cast(key), ptr.get())); } for each (auto item in outputs) @@ -239,7 +239,7 @@ public ref class IEvaluateModelManaged : IDisposable pin_ptr key = PtrToStringChars(item.Key); shared_ptr> ptr = CopyList(item.Value); sharedOutputVectors.push_back(ptr); - stdOutputs.insert(MapEntry(key, ptr.get())); + stdOutputs.insert(MapEntry(static_cast(key), ptr.get())); } try @@ -382,13 +382,13 @@ public ref class IEvaluateModelManaged : IDisposable std::vector>> sharedOutputVectors; pin_ptr inputKey = PtrToStringChars(inputNodeName); shared_ptr> f2(featureVector); - stdInputs.insert(MapEntry(inputKey, f2.get())); + stdInputs.insert(MapEntry(static_cast(inputKey), f2.get())); pin_ptr key = PtrToStringChars(outputKey); // Do we have to initialize the output nodes? shared_ptr> ptr(new std::vector(outputSize)); sharedOutputVectors.push_back(ptr); - stdOutputs.insert(MapEntry(key, ptr.get())); + stdOutputs.insert(MapEntry(static_cast(key), ptr.get())); try { m_eval->Evaluate(stdInputs, stdOutputs); @@ -517,7 +517,7 @@ public ref class IEvaluateModelManaged : IDisposable pin_ptr key = PtrToStringChars(item.Key); shared_ptr> ptr = CopyList(item.Value); sharedOutputVectors.push_back(ptr); - stdOutputs.insert(MapEntry(key, ptr.get())); + stdOutputs.insert(MapEntry(static_cast(key), ptr.get())); } try diff --git a/Source/Math/GPUMatrixCUDAKernels.cuh b/Source/Math/GPUMatrixCUDAKernels.cuh index 95596948bbe6..59edf814b59c 100755 --- a/Source/Math/GPUMatrixCUDAKernels.cuh +++ b/Source/Math/GPUMatrixCUDAKernels.cuh @@ -15,7 +15,6 @@ #include "CommonMatrix.h" #include "GPUMatrix.h" #include "TensorOps.h" // for exp_() etc. -#include "device_functions.h" #include #include #include diff --git a/Source/Math/cudalib.cpp b/Source/Math/cudalib.cpp index ba7f2eaacb9f..3d999ae5a8ab 100644 --- a/Source/Math/cudalib.cpp +++ b/Source/Math/cudalib.cpp @@ -22,6 +22,8 @@ #pragma comment(lib, "cudart.lib") // link CUDA runtime #pragma comment(lib, "cublas.lib") +#pragma warning(disable : 4505) // 'function' : unreferenced local function has been removed + namespace msra { namespace cuda { static int devicesallocated = -1; // -1 means not initialized diff --git a/Source/Math/half.hpp b/Source/Math/half.hpp index bcab31eaf0db..d70de5116697 100644 --- a/Source/Math/half.hpp +++ b/Source/Math/half.hpp @@ -11,7 +11,10 @@ #include "../CNTKv2LibraryDll/API/HalfConverter.hpp" #if !defined(CPUONLY) && __has_include("cuda_fp16.h") -#include // ASSUME CUDA9 + +#pragma warning(disable : 4505) // 'function' : unreferenced local function has been removed + +#include // ASSUME CUDA10 #else class alignas(2) __half { diff --git a/Tools/devInstall/Windows/DevInstall.ps1 b/Tools/devInstall/Windows/DevInstall.ps1 index cc8c30bbad1a..b3b13cfff558 100644 --- a/Tools/devInstall/Windows/DevInstall.ps1 +++ b/Tools/devInstall/Windows/DevInstall.ps1 @@ -136,9 +136,9 @@ Function main $operation += OpScanProgram $operation += OpCheckVS2017 - $operation += OpCheckCuda9 - $operation += OpNVidiaCudnn7090 -cache $localCache -targetFolder $localDir - $operation += OpNvidiaCub174 -cache $localCache -targetFolder $localDir + $operation += OpCheckCuda10 + $operation += OpNVidiaCudnn73100 -cache $localCache -targetFolder $localDir + $operation += OpNvidiaCub180 -cache $localCache -targetFolder $localDir $operation += OpCMake362 -cache $localCache $operation += OpMSMPI70 -cache $localCache diff --git a/Tools/devInstall/Windows/helper/Operations.ps1 b/Tools/devInstall/Windows/helper/Operations.ps1 index 0ea04ef7afb4..6876630c9d75 100644 --- a/Tools/devInstall/Windows/helper/Operations.ps1 +++ b/Tools/devInstall/Windows/helper/Operations.ps1 @@ -149,19 +149,19 @@ function OpMSMPI70SDK( } ) } -function OpNvidiaCub174( +function OpNvidiaCub180( [parameter(Mandatory=$true)][string] $cache, [parameter(Mandatory=$true)][string] $targetFolder) { - $prodName = "NVidia CUB 1.7.4" - $prodFile = "cub-1.7.4.zip" - $prodSubDir = "cub-1.7.4" + $prodName = "NVidia CUB 1.8.0" + $prodFile = "cub-1.8.0.zip" + $prodSubDir = "cub-1.8.0" $targetPath = join-path $targetFolder $prodSubDir $envVar = "CUB_PATH"; $envValue = $targetPath - $downloadSource = "https://github.com/NVlabs/cub/archive/1.7.4.zip" + $downloadSource = "https://github.com/NVlabs/cub/archive/1.8.0.zip" - @( @{ShortName = "CUB174"; VerifyInfo = "Checking for $prodName in $targetPath"; ActionInfo = "Installing $prodName"; + @( @{ShortName = "CUB180"; VerifyInfo = "Checking for $prodName in $targetPath"; ActionInfo = "Installing $prodName"; Verification = @( @{Function = "VerifyDirectory"; Path = "$targetPath" }, @{Function = "VerifyEnvironmentAndData"; EnvVar = $envVar; Content = $envValue } ); Download = @( @{Function = "Download"; Method = "WebRequest"; Source = $downloadSource; Destination = "$cache\$prodFile" } ); @@ -170,20 +170,20 @@ function OpNvidiaCub174( } ) } -function OpNVidiaCudnn7090( +function OpNVidiaCudnn73100( [parameter(Mandatory=$true)][string] $cache, [parameter(Mandatory=$true)][string] $targetFolder) { - $prodName = "NVidia CUDNN 7.0.5 for CUDA 9.0" - $cudnnWin = "cudnn-9.0-windows10-x64-v7.zip" + $prodName = "NVidia CUDNN 7.3.1 for CUDA 10.0" + $cudnnWin = "cudnn-10.0-windows10-x64-v7.3.1.20.zip" - $prodSubDir = "cudnn-9.0-v7.0.5" + $prodSubDir = "cudnn-10.0-v7.3.1" $targetPath = join-path $targetFolder $prodSubDir $envVar = "CUDNN_PATH" $envValue = join-path $targetPath "cuda" - $downloadSource = "http://developer.download.nvidia.com/compute/redist/cudnn/v7.0.5" + $downloadSource = "http://developer.download.nvidia.com/compute/redist/cudnn/v7.3.1" - @( @{ShortName = "CUDNN7090"; VerifyInfo = "Checking for $prodName in $targetPath"; ActionInfo = "Installing $prodName"; + @( @{ShortName = "CUDNN73100"; VerifyInfo = "Checking for $prodName in $targetPath"; ActionInfo = "Installing $prodName"; Verification = @( @{Function = "VerifyDirectory"; Path = $targetPath }, @{Function = "VerifyDirectory"; Path = $envValue }, @{Function = "VerifyEnvironmentAndData"; EnvVar = $envVar; Content = $envValue } ); @@ -308,13 +308,13 @@ function OpCheckVS2017 } ) } -function OpCheckCuda9 +function OpCheckCuda10 { - $programPath = join-path $env:ProgramFiles "NVIDIA GPU Computing Toolkit\CUDA\v9.0" - @( @{Name = "Verify Installation of NVidia Cuda 9.0"; ShortName = "PRECUDA90"; VerifyInfo = "Checking for NVidia Cuda 9.0"; + $programPath = join-path $env:ProgramFiles "NVIDIA GPU Computing Toolkit\CUDA\v10.0" + @( @{Name = "Verify Installation of NVidia Cuda 10.0"; ShortName = "PRECUDA100"; VerifyInfo = "Checking for NVidia Cuda 10.0"; Verification = @( @{Function = "VerifyDirectory"; Path = $programPath }, - @{Function = "VerifyEnvironmentAndData"; EnvVar = "CUDA_PATH_V9_0"; Content = $programPath } ); - PreReq = @( @{Function = "PrereqInfoCuda9" } ); + @{Function = "VerifyEnvironmentAndData"; EnvVar = "CUDA_PATH_V10_0"; Content = $programPath } ); + PreReq = @( @{Function = "PrereqInfoCuda10" } ); Action = @( @{Function = "StopInstallation" } ) } ) } diff --git a/Tools/devInstall/Windows/helper/PreRequisites.ps1 b/Tools/devInstall/Windows/helper/PreRequisites.ps1 index d7db05517234..ed6f50bc915a 100644 --- a/Tools/devInstall/Windows/helper/PreRequisites.ps1 +++ b/Tools/devInstall/Windows/helper/PreRequisites.ps1 @@ -44,14 +44,14 @@ for more details. " } -function PrereqInfoCuda9( +function PrereqInfoCuda10( [Parameter(Mandatory = $true)][hashtable] $table ) { FunctionIntro $table Write-Warning " -Installation of NVidia CUDA 9.0 is a pre-requisite before installation can continue. +Installation of NVidia CUDA 10.0 is a pre-requisite before installation can continue. Please check https://docs.microsoft.com/en-us/cognitive-toolkit/Setup-CNTK-on-Windows for more details. diff --git a/Tools/docker/CNTK-GPU-Image/Dockerfile b/Tools/docker/CNTK-GPU-Image/Dockerfile index ee2aa0856ca1..8ed653c9718a 100644 --- a/Tools/docker/CNTK-GPU-Image/Dockerfile +++ b/Tools/docker/CNTK-GPU-Image/Dockerfile @@ -1,16 +1,14 @@ -# Tag: nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04 -# Created: 2017-11-21T06:34:14.675603521Z -# Label: com.nvidia.build.id: 41212533 -# Label: com.nvidia.build.ref: e0edb5359ecb7bd3d86f0c9bfa18c2260b741ebb -# Label: com.nvidia.cuda.version: 9.0.176 -# Label: com.nvidia.cudnn.version: 7.0.4.31 -# Label: com.nvidia.nccl.version: 2.1.2 +# Tag: nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04 +# Created: 2018-10-22T21:14:30.605789926Z +# Label: com.nvidia.cuda.version: 10.0. +# Label: com.nvidia.cudnn.version: 7.3.1.20 +# Label: com.nvidia.nccl.version: 2.3.5 # # To build, run from the parent with the command line: # docker build -t -f CNTK-GPU-Image/Dockerfile . # Ubuntu 16.04.5 -FROM nvidia/cuda@sha256:33add9c50ab76b8f3a92187c0418ed600d5bea27690fda40711122fdc28ce2f4 +FROM nvidia/cuda@sha256:362e4e25aa46a18dfa834360140e91b61cdb0a3a2796c8e09dadb268b9de3f6b RUN apt-get update && apt-get install -y --no-install-recommends \ autotools-dev \ @@ -85,7 +83,7 @@ RUN LIBZIP_VERSION=1.1.2 && \ ENV LD_LIBRARY_PATH /usr/local/lib:$LD_LIBRARY_PATH -RUN wget -q -O - https://github.com/NVlabs/cub/archive/1.7.4.tar.gz | tar -C /usr/local -xzf - +RUN wget -q -O - https://github.com/NVlabs/cub/archive/1.8.0.tar.gz | tar -C /usr/local -xzf - RUN OPENCV_VERSION=3.1.0 && \ wget -q -O - https://github.com/Itseez/opencv/archive/${OPENCV_VERSION}.tar.gz | tar -xzf - && \ diff --git a/bindings/java/Swig/post-build.cmd b/bindings/java/Swig/post-build.cmd index ca04c865aae5..24c5fccfb79c 100644 --- a/bindings/java/Swig/post-build.cmd +++ b/bindings/java/Swig/post-build.cmd @@ -25,7 +25,7 @@ echo Building java. if not exist "%project_dir%com\microsoft\CNTK\lib\windows" mkdir "%project_dir%com\microsoft\CNTK\lib\windows" if "%is_gpu%" == "true" ( - for %%x in (cublas64_90.dll cudart64_90.dll cudnn64_7.dll curand64_90.dll cusparse64_90.dll nvml.dll) do ( + for %%x in (cublas64_100.dll cudart64_100.dll cudnn64_7.dll curand64_100.dll cusparse64_100.dll nvml.dll) do ( copy "%output_dir%/%%x" ".\com\microsoft\CNTK\lib\windows\%%x" echo %%x>> .\com\microsoft\CNTK\lib\windows\NATIVE_MANIFEST ) diff --git a/configure b/configure index b9a8efc38576..097b7550fd63 100755 --- a/configure +++ b/configure @@ -127,13 +127,13 @@ default_openblas="" default_boost="boost-1.60.0" -default_cudas="cuda-9.0" +default_cudas="cuda-10.0" default_nccls="nccl" default_kaldis="kaldi-trunk kaldi-c024e8aa" default_gdk_includes="include/nvidia/gdk cuda/include" default_gdk_nvml_libs="src/gdk/nvml/lib cuda/lib64/stubs" -default_cubs="cub-1.7.4" -default_cudnns="cudnn-7.0" +default_cubs="cub-1.8.0" +default_cudnns="cudnn-7.3" default_opencvs="opencv-3.1.0 opencv-3.0.0" default_protobuf="protobuf-3.1.0" default_libzips="libzip-1.1.2" @@ -688,7 +688,7 @@ do then echo "Cannot find NVIDIA CUB directory." echo "Please specify a value for --with-cub" - echo "NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.4.1.zip, extract the archive to /usr/local" + echo "NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.8.0.zip, extract the archive to /usr/local" exit 1 fi else @@ -1082,7 +1082,7 @@ then if test x$cub_path = x ; then echo Cannot locate NVIDIA CUB directory echo GPU will be disabled - echo NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.4.1.zip, extract the archive to /usr/local + echo NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.8.0.zip, extract the archive to /usr/local enable_cuda=no else echo Found CUB at $cub_path