From 754085133cc301216109de1c2f0615acaa101c9b Mon Sep 17 00:00:00 2001 From: polygraphene Date: Sat, 16 Jun 2018 03:42:11 +0900 Subject: [PATCH] Convert RGBA texture to NV12 CUDA buffer for Windows 7. --- ALVR.sln | 10 +++ CUDA/CUDA.vcxproj | 90 ++++++++++++++++++++++ CUDA/RGBToNV12.cu | 88 +++++++++++++++++++++ CUDA/RGBToNV12.h | 8 ++ alvr_server/CudaConverter.h | 27 ++++--- alvr_server/NvCodecUtils.h | 2 +- alvr_server/alvr_server.cpp | 6 +- alvr_server/driver_virtual_display.vcxproj | 19 +++-- 8 files changed, 230 insertions(+), 20 deletions(-) create mode 100644 CUDA/CUDA.vcxproj create mode 100644 CUDA/RGBToNV12.cu create mode 100644 CUDA/RGBToNV12.h diff --git a/ALVR.sln b/ALVR.sln index c7de1bf9..cbfddbb9 100644 --- a/ALVR.sln +++ b/ALVR.sln @@ -9,6 +9,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ALVR", "ALVR\ALVR.csproj", EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "alvr_server", "alvr_server\driver_virtual_display.vcxproj", "{6FB51D67-327E-4A18-BC74-7AA6AAB4C827}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CUDA", "CUDA\CUDA.vcxproj", "{12336478-A663-42F8-8D52-8D093CD99992}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -51,6 +53,14 @@ Global {6FB51D67-327E-4A18-BC74-7AA6AAB4C827}.Release|Win32.Build.0 = Release|Win32 {6FB51D67-327E-4A18-BC74-7AA6AAB4C827}.Release|x64.ActiveCfg = Release|x64 {6FB51D67-327E-4A18-BC74-7AA6AAB4C827}.Release|x64.Build.0 = Release|x64 + {12336478-A663-42F8-8D52-8D093CD99992}.Debug|Any CPU.ActiveCfg = Debug|x64 + {12336478-A663-42F8-8D52-8D093CD99992}.Debug|Win32.ActiveCfg = Debug|x64 + {12336478-A663-42F8-8D52-8D093CD99992}.Debug|x64.ActiveCfg = Debug|x64 + {12336478-A663-42F8-8D52-8D093CD99992}.Debug|x64.Build.0 = Debug|x64 + {12336478-A663-42F8-8D52-8D093CD99992}.Release|Any CPU.ActiveCfg = Release|x64 + {12336478-A663-42F8-8D52-8D093CD99992}.Release|Win32.ActiveCfg = Release|x64 + {12336478-A663-42F8-8D52-8D093CD99992}.Release|x64.ActiveCfg = Release|x64 + {12336478-A663-42F8-8D52-8D093CD99992}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/CUDA/CUDA.vcxproj b/CUDA/CUDA.vcxproj new file mode 100644 index 00000000..7a167368 --- /dev/null +++ b/CUDA/CUDA.vcxproj @@ -0,0 +1,90 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {12336478-A663-42F8-8D52-8D093CD99992} + CUDA + 10.0.17134.0 + + + + StaticLibrary + true + MultiByte + v141 + + + StaticLibrary + false + true + MultiByte + v141 + + + + + + + + + + + + + + true + + + + Level3 + Disabled + WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + Console + cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + 64 + + + + + Level3 + MaxSpeed + true + true + WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + true + true + Console + cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + 64 + + + + + + + + + + + + + \ No newline at end of file diff --git a/CUDA/RGBToNV12.cu b/CUDA/RGBToNV12.cu new file mode 100644 index 00000000..8fe1091f --- /dev/null +++ b/CUDA/RGBToNV12.cu @@ -0,0 +1,88 @@ +#include + +#include "RGBToNV12.h" + +__device__ float rgb2y(uchar4 c) { + return 0.257f * c.x + 0.504f * c.y + 0.098f * c.z + 16.0f; +} + +__device__ float rgb2u(uchar4 c) { + return -0.148f * c.x - 0.291f * c.y + 0.439f * c.z + 128.0f; +} + +__device__ float rgb2v(uchar4 c) { + return 0.439f * c.x - 0.368f * c.y - 0.071f * c.z + 128.0f; +} + +texture texRef; + +__global__ void RGBA2NV12_kernel(uint8_t *dstImage, size_t destPitch, + uint32_t width, uint32_t height) +{ + // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread + int32_t x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1); + int32_t y = blockIdx.y * (blockDim.y << 1) + (threadIdx.y << 1); + + int x1 = x + 1; + int y1 = y + 1; + + if (x1 >= width) + return; //x = width - 1; + + if (y1 >= height) + return; // y = height - 1; + + uchar4 c00 = tex2D(texRef, x, y); + uchar4 c01 = tex2D(texRef, x1, y); + uchar4 c10 = tex2D(texRef, x, y1); + uchar4 c11 = tex2D(texRef, x1, y1); + + uint8_t y00 = (uint8_t)(rgb2y(c00) + 0.5f); + uint8_t y01 = (uint8_t)(rgb2y(c01) + 0.5f); + uint8_t y10 = (uint8_t)(rgb2y(c10) + 0.5f); + uint8_t y11 = (uint8_t)(rgb2y(c11) + 0.5f); + + uint8_t u = (uint8_t)((rgb2u(c00) + rgb2u(c01) + rgb2u(c10) + rgb2u(c11)) * 0.25f + 0.5f); + uint8_t v = (uint8_t)((rgb2v(c00) + rgb2v(c01) + rgb2v(c10) + rgb2v(c11)) * 0.25f + 0.5f); + + dstImage[destPitch * y + x] = y00; + dstImage[destPitch * y + x1] = y01; + dstImage[destPitch * y1 + x] = y10; + dstImage[destPitch * y1 + x1] = y11; + + uint32_t chromaOffset = destPitch * height; + int32_t x_chroma = x; + int32_t y_chroma = y >> 1; + + dstImage[chromaOffset + destPitch * y_chroma + x_chroma] = u; + dstImage[chromaOffset + destPitch * y_chroma + x_chroma + 1] = v; +} + +extern "C" +cudaError_t RGBA2NV12(cudaArray *srcImage, + uint8_t *dstImage, size_t destPitch, + uint32_t width, uint32_t height) +{ + cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned); + + // Set texture parameters + texRef.addressMode[0] = cudaAddressModeWrap; + texRef.addressMode[1] = cudaAddressModeWrap; + texRef.filterMode = cudaFilterModePoint; + texRef.normalized = false; + + cudaError_t cudaStatus = cudaBindTextureToArray(texRef, srcImage, channelDesc); + if (cudaStatus != cudaSuccess) { + return cudaStatus; + } + + dim3 block(32, 16, 1); + dim3 grid((width + (2 * block.x - 1)) / (2 * block.x), (height + (2 * block.y - 1)) / (2 * block.y), 1); + + RGBA2NV12_kernel<<>>(dstImage, destPitch, width, height); + + cudaThreadSynchronize(); + + cudaStatus = cudaGetLastError(); + return cudaStatus; +} \ No newline at end of file diff --git a/CUDA/RGBToNV12.h b/CUDA/RGBToNV12.h new file mode 100644 index 00000000..eb6b0c84 --- /dev/null +++ b/CUDA/RGBToNV12.h @@ -0,0 +1,8 @@ +#pragma once + +#include + +extern "C" +cudaError_t RGBA2NV12(cudaArray *srcImage, + uint8_t *dstImage, size_t destPitch, + uint32_t width, uint32_t height); \ No newline at end of file diff --git a/alvr_server/CudaConverter.h b/alvr_server/CudaConverter.h index ae228bdd..33bb4840 100644 --- a/alvr_server/CudaConverter.h +++ b/alvr_server/CudaConverter.h @@ -8,6 +8,8 @@ #include #include +#include + #include "Logger.h" using Microsoft::WRL::ComPtr; @@ -39,25 +41,32 @@ class CudaConverter { return m_cuContext; } - void Convert(const ComPtr &texture) { + void Convert(const ComPtr &texture, const NvEncInputFrame* encoderInputFrame) { + cudaError cuStatus; + RegisterTexture(texture); + cuStatus = cudaGraphicsMapResources(1, &m_cudaResource, 0); + if (cuStatus != cudaSuccess) { + throw MakeException("cudaGraphicsMapResources failed."); + } + cudaArray *cuArray; - cudaError cuStatus = cudaGraphicsSubResourceGetMappedArray(&cuArray, m_cudaResource, 0, 0); + cuStatus = cudaGraphicsSubResourceGetMappedArray(&cuArray, m_cudaResource, 0, 0); if (cuStatus != cudaSuccess) { throw MakeException("cudaGraphicsSubResourceGetMappedArray failed."); } - // then we want to copy cudaLinearMemory to the D3D texture, via its mapped form : cudaArray - cuStatus = cudaMemcpy2DFromArray( - m_cudaLinearMemory, m_pitch, // dst array - cuArray, // src - 0, 0, - m_width, m_height, // extent - cudaMemcpyDeviceToDevice); // kind + cuStatus = RGBA2NV12(cuArray, (uint8_t *)encoderInputFrame->inputPtr, encoderInputFrame->pitch, m_width, m_height); + if (cuStatus != cudaSuccess) { throw MakeException("cudaMemcpy2DFromArray failed."); } + + cudaGraphicsUnmapResources(1, &m_cudaResource, 0); + if (cuStatus != cudaSuccess) { + throw MakeException("cudaGraphicsUnmapResources failed."); + } } private: diff --git a/alvr_server/NvCodecUtils.h b/alvr_server/NvCodecUtils.h index 291ed1fa..d668ae3a 100644 --- a/alvr_server/NvCodecUtils.h +++ b/alvr_server/NvCodecUtils.h @@ -24,7 +24,7 @@ inline bool check(CUresult e, int iLine, const char *szFile) { if (e != CUDA_SUCCESS) { const char *szErrName = NULL; cuGetErrorName(e, &szErrName); - LOG(FATAL) << "CUDA driver API error " << szErrName << " at line " << iLine << " in file " << szFile; + Log("CUDA driver API error %s at line %d in file %s", szErrName, iLine, szFile); return false; } return true; diff --git a/alvr_server/alvr_server.cpp b/alvr_server/alvr_server.cpp index ca90713d..e4b9b9e9 100644 --- a/alvr_server/alvr_server.cpp +++ b/alvr_server/alvr_server.cpp @@ -32,7 +32,8 @@ #include "packet_types.h" #include "resource.h" #include "Tracking.h" -#include "CudaConverter.h" +#include "CudaConverter.h" +#include "RGBToNV12.h" HINSTANCE g_hInstance; @@ -194,7 +195,7 @@ namespace { try { Log("ConvertRGBToNV12 start"); - m_Converter->Convert(pTexture); + m_Converter->Convert(pTexture, encoderInputFrame); Log("ConvertRGBToNV12 end"); } catch (NVENCException e) { @@ -1374,6 +1375,7 @@ class CServerDriver_DisplayRedirect : public vr::IServerTrackedDeviceProvider std::shared_ptr m_mutex; }; +extern "C" int main_test(); vr::EVRInitError CServerDriver_DisplayRedirect::Init( vr::IVRDriverContext *pContext ) { VR_INIT_SERVER_DRIVER_CONTEXT( pContext ); diff --git a/alvr_server/driver_virtual_display.vcxproj b/alvr_server/driver_virtual_display.vcxproj index 1fc083ad..3de5eb20 100644 --- a/alvr_server/driver_virtual_display.vcxproj +++ b/alvr_server/driver_virtual_display.vcxproj @@ -78,12 +78,12 @@ Level3 Disabled true - ../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include + ../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include;$(SolutionDir)CUDA\ _WINDLL;NOMINMAX;%(PreprocessorDefinitions) true - ws2_32.lib;$(CUDA_PATH)\lib\Win32\cuda.lib;%(AdditionalDependencies) + ws2_32.lib;$(SolutionDir)$(Platform)\$(Configuration)\CUDA.lib;$(CUDA_PATH)\lib\Win32\cudart_static.lib;$(CUDA_PATH)\lib\Win32\cuda.lib;%(AdditionalDependencies) @@ -98,12 +98,12 @@ Level3 Disabled true - ../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include + ../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include;$(SolutionDir)CUDA\ _WINDLL;NOMINMAX;_WINSOCKAPI_;%(PreprocessorDefinitions) true - ws2_32.lib;$(CUDA_PATH)\lib\x64\cuda.lib;%(AdditionalDependencies) + ws2_32.lib;$(SolutionDir)$(Platform)\$(Configuration)\CUDA.lib;$(CUDA_PATH)\lib\x64\cudart_static.lib;$(CUDA_PATH)\lib\x64\cuda.lib;%(AdditionalDependencies) @@ -120,14 +120,14 @@ true true true - ../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include + ../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include;$(SolutionDir)CUDA\ _WINDLL;NOMINMAX;%(PreprocessorDefinitions) true true true - ws2_32.lib;$(CUDA_PATH)\lib\Win32\cuda.lib;%(AdditionalDependencies) + ws2_32.lib;$(SolutionDir)$(Platform)\$(Configuration)\CUDA.lib;$(CUDA_PATH)\lib\Win32\cudart_static.lib;$(CUDA_PATH)\lib\Win32\cuda.lib;%(AdditionalDependencies) @@ -144,14 +144,14 @@ true true true - ../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include + ../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include;$(SolutionDir)CUDA\ _WINDLL;NOMINMAX;_WINSOCKAPI_;%(PreprocessorDefinitions) true true true - ws2_32.lib;$(CUDA_PATH)\lib\x64\cuda.lib;%(AdditionalDependencies) + ws2_32.lib;$(SolutionDir)$(Platform)\$(Configuration)\CUDA.lib;$(CUDA_PATH)\lib\x64\cudart_static.lib;$(CUDA_PATH)\lib\x64\cuda.lib;%(AdditionalDependencies) @@ -174,6 +174,9 @@ + + {12336478-a663-42f8-8d52-8d093cd99992} + {10868996-d864-4e88-8bcb-ba530af64712}