diff --git a/ALVR.sln b/ALVR.sln
index c7de1bf9..cbfddbb9 100644
--- a/ALVR.sln
+++ b/ALVR.sln
@@ -9,6 +9,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ALVR", "ALVR\ALVR.csproj",
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "alvr_server", "alvr_server\driver_virtual_display.vcxproj", "{6FB51D67-327E-4A18-BC74-7AA6AAB4C827}"
EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CUDA", "CUDA\CUDA.vcxproj", "{12336478-A663-42F8-8D52-8D093CD99992}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -51,6 +53,14 @@ Global
{6FB51D67-327E-4A18-BC74-7AA6AAB4C827}.Release|Win32.Build.0 = Release|Win32
{6FB51D67-327E-4A18-BC74-7AA6AAB4C827}.Release|x64.ActiveCfg = Release|x64
{6FB51D67-327E-4A18-BC74-7AA6AAB4C827}.Release|x64.Build.0 = Release|x64
+ {12336478-A663-42F8-8D52-8D093CD99992}.Debug|Any CPU.ActiveCfg = Debug|x64
+ {12336478-A663-42F8-8D52-8D093CD99992}.Debug|Win32.ActiveCfg = Debug|x64
+ {12336478-A663-42F8-8D52-8D093CD99992}.Debug|x64.ActiveCfg = Debug|x64
+ {12336478-A663-42F8-8D52-8D093CD99992}.Debug|x64.Build.0 = Debug|x64
+ {12336478-A663-42F8-8D52-8D093CD99992}.Release|Any CPU.ActiveCfg = Release|x64
+ {12336478-A663-42F8-8D52-8D093CD99992}.Release|Win32.ActiveCfg = Release|x64
+ {12336478-A663-42F8-8D52-8D093CD99992}.Release|x64.ActiveCfg = Release|x64
+ {12336478-A663-42F8-8D52-8D093CD99992}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/CUDA/CUDA.vcxproj b/CUDA/CUDA.vcxproj
new file mode 100644
index 00000000..7a167368
--- /dev/null
+++ b/CUDA/CUDA.vcxproj
@@ -0,0 +1,90 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+ {12336478-A663-42F8-8D52-8D093CD99992}
+ CUDA
+ 10.0.17134.0
+
+
+
+ StaticLibrary
+ true
+ MultiByte
+ v141
+
+
+ StaticLibrary
+ false
+ true
+ MultiByte
+ v141
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+ Level3
+ Disabled
+ WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ Console
+ cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+
+
+ 64
+
+
+
+
+ Level3
+ MaxSpeed
+ true
+ true
+ WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ true
+ true
+ Console
+ cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+
+
+ 64
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/CUDA/RGBToNV12.cu b/CUDA/RGBToNV12.cu
new file mode 100644
index 00000000..8fe1091f
--- /dev/null
+++ b/CUDA/RGBToNV12.cu
@@ -0,0 +1,88 @@
+#include
+
+#include "RGBToNV12.h"
+
+__device__ float rgb2y(uchar4 c) {
+ return 0.257f * c.x + 0.504f * c.y + 0.098f * c.z + 16.0f;
+}
+
+__device__ float rgb2u(uchar4 c) {
+ return -0.148f * c.x - 0.291f * c.y + 0.439f * c.z + 128.0f;
+}
+
+__device__ float rgb2v(uchar4 c) {
+ return 0.439f * c.x - 0.368f * c.y - 0.071f * c.z + 128.0f;
+}
+
+texture texRef;
+
+__global__ void RGBA2NV12_kernel(uint8_t *dstImage, size_t destPitch,
+ uint32_t width, uint32_t height)
+{
+ // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
+ int32_t x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
+ int32_t y = blockIdx.y * (blockDim.y << 1) + (threadIdx.y << 1);
+
+ int x1 = x + 1;
+ int y1 = y + 1;
+
+ if (x1 >= width)
+ return; //x = width - 1;
+
+ if (y1 >= height)
+ return; // y = height - 1;
+
+ uchar4 c00 = tex2D(texRef, x, y);
+ uchar4 c01 = tex2D(texRef, x1, y);
+ uchar4 c10 = tex2D(texRef, x, y1);
+ uchar4 c11 = tex2D(texRef, x1, y1);
+
+ uint8_t y00 = (uint8_t)(rgb2y(c00) + 0.5f);
+ uint8_t y01 = (uint8_t)(rgb2y(c01) + 0.5f);
+ uint8_t y10 = (uint8_t)(rgb2y(c10) + 0.5f);
+ uint8_t y11 = (uint8_t)(rgb2y(c11) + 0.5f);
+
+ uint8_t u = (uint8_t)((rgb2u(c00) + rgb2u(c01) + rgb2u(c10) + rgb2u(c11)) * 0.25f + 0.5f);
+ uint8_t v = (uint8_t)((rgb2v(c00) + rgb2v(c01) + rgb2v(c10) + rgb2v(c11)) * 0.25f + 0.5f);
+
+ dstImage[destPitch * y + x] = y00;
+ dstImage[destPitch * y + x1] = y01;
+ dstImage[destPitch * y1 + x] = y10;
+ dstImage[destPitch * y1 + x1] = y11;
+
+ uint32_t chromaOffset = destPitch * height;
+ int32_t x_chroma = x;
+ int32_t y_chroma = y >> 1;
+
+ dstImage[chromaOffset + destPitch * y_chroma + x_chroma] = u;
+ dstImage[chromaOffset + destPitch * y_chroma + x_chroma + 1] = v;
+}
+
+extern "C"
+cudaError_t RGBA2NV12(cudaArray *srcImage,
+ uint8_t *dstImage, size_t destPitch,
+ uint32_t width, uint32_t height)
+{
+ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned);
+
+ // Set texture parameters
+ texRef.addressMode[0] = cudaAddressModeWrap;
+ texRef.addressMode[1] = cudaAddressModeWrap;
+ texRef.filterMode = cudaFilterModePoint;
+ texRef.normalized = false;
+
+ cudaError_t cudaStatus = cudaBindTextureToArray(texRef, srcImage, channelDesc);
+ if (cudaStatus != cudaSuccess) {
+ return cudaStatus;
+ }
+
+ dim3 block(32, 16, 1);
+ dim3 grid((width + (2 * block.x - 1)) / (2 * block.x), (height + (2 * block.y - 1)) / (2 * block.y), 1);
+
+ RGBA2NV12_kernel<<>>(dstImage, destPitch, width, height);
+
+ cudaThreadSynchronize();
+
+ cudaStatus = cudaGetLastError();
+ return cudaStatus;
+}
\ No newline at end of file
diff --git a/CUDA/RGBToNV12.h b/CUDA/RGBToNV12.h
new file mode 100644
index 00000000..eb6b0c84
--- /dev/null
+++ b/CUDA/RGBToNV12.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include
+
+extern "C"
+cudaError_t RGBA2NV12(cudaArray *srcImage,
+ uint8_t *dstImage, size_t destPitch,
+ uint32_t width, uint32_t height);
\ No newline at end of file
diff --git a/alvr_server/CudaConverter.h b/alvr_server/CudaConverter.h
index ae228bdd..33bb4840 100644
--- a/alvr_server/CudaConverter.h
+++ b/alvr_server/CudaConverter.h
@@ -8,6 +8,8 @@
#include
#include
+#include
+
#include "Logger.h"
using Microsoft::WRL::ComPtr;
@@ -39,25 +41,32 @@ class CudaConverter {
return m_cuContext;
}
- void Convert(const ComPtr &texture) {
+ void Convert(const ComPtr &texture, const NvEncInputFrame* encoderInputFrame) {
+ cudaError cuStatus;
+
RegisterTexture(texture);
+ cuStatus = cudaGraphicsMapResources(1, &m_cudaResource, 0);
+ if (cuStatus != cudaSuccess) {
+ throw MakeException("cudaGraphicsMapResources failed.");
+ }
+
cudaArray *cuArray;
- cudaError cuStatus = cudaGraphicsSubResourceGetMappedArray(&cuArray, m_cudaResource, 0, 0);
+ cuStatus = cudaGraphicsSubResourceGetMappedArray(&cuArray, m_cudaResource, 0, 0);
if (cuStatus != cudaSuccess) {
throw MakeException("cudaGraphicsSubResourceGetMappedArray failed.");
}
- // then we want to copy cudaLinearMemory to the D3D texture, via its mapped form : cudaArray
- cuStatus = cudaMemcpy2DFromArray(
- m_cudaLinearMemory, m_pitch, // dst array
- cuArray, // src
- 0, 0,
- m_width, m_height, // extent
- cudaMemcpyDeviceToDevice); // kind
+ cuStatus = RGBA2NV12(cuArray, (uint8_t *)encoderInputFrame->inputPtr, encoderInputFrame->pitch, m_width, m_height);
+
if (cuStatus != cudaSuccess) {
throw MakeException("cudaMemcpy2DFromArray failed.");
}
+
+ cudaGraphicsUnmapResources(1, &m_cudaResource, 0);
+ if (cuStatus != cudaSuccess) {
+ throw MakeException("cudaGraphicsUnmapResources failed.");
+ }
}
private:
diff --git a/alvr_server/NvCodecUtils.h b/alvr_server/NvCodecUtils.h
index 291ed1fa..d668ae3a 100644
--- a/alvr_server/NvCodecUtils.h
+++ b/alvr_server/NvCodecUtils.h
@@ -24,7 +24,7 @@ inline bool check(CUresult e, int iLine, const char *szFile) {
if (e != CUDA_SUCCESS) {
const char *szErrName = NULL;
cuGetErrorName(e, &szErrName);
- LOG(FATAL) << "CUDA driver API error " << szErrName << " at line " << iLine << " in file " << szFile;
+ Log("CUDA driver API error %s at line %d in file %s", szErrName, iLine, szFile);
return false;
}
return true;
diff --git a/alvr_server/alvr_server.cpp b/alvr_server/alvr_server.cpp
index ca90713d..e4b9b9e9 100644
--- a/alvr_server/alvr_server.cpp
+++ b/alvr_server/alvr_server.cpp
@@ -32,7 +32,8 @@
#include "packet_types.h"
#include "resource.h"
#include "Tracking.h"
-#include "CudaConverter.h"
+#include "CudaConverter.h"
+#include "RGBToNV12.h"
HINSTANCE g_hInstance;
@@ -194,7 +195,7 @@ namespace
{
try {
Log("ConvertRGBToNV12 start");
- m_Converter->Convert(pTexture);
+ m_Converter->Convert(pTexture, encoderInputFrame);
Log("ConvertRGBToNV12 end");
}
catch (NVENCException e) {
@@ -1374,6 +1375,7 @@ class CServerDriver_DisplayRedirect : public vr::IServerTrackedDeviceProvider
std::shared_ptr m_mutex;
};
+extern "C" int main_test();
vr::EVRInitError CServerDriver_DisplayRedirect::Init( vr::IVRDriverContext *pContext )
{
VR_INIT_SERVER_DRIVER_CONTEXT( pContext );
diff --git a/alvr_server/driver_virtual_display.vcxproj b/alvr_server/driver_virtual_display.vcxproj
index 1fc083ad..3de5eb20 100644
--- a/alvr_server/driver_virtual_display.vcxproj
+++ b/alvr_server/driver_virtual_display.vcxproj
@@ -78,12 +78,12 @@
Level3
Disabled
true
- ../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include
+ ../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include;$(SolutionDir)CUDA\
_WINDLL;NOMINMAX;%(PreprocessorDefinitions)
true
- ws2_32.lib;$(CUDA_PATH)\lib\Win32\cuda.lib;%(AdditionalDependencies)
+ ws2_32.lib;$(SolutionDir)$(Platform)\$(Configuration)\CUDA.lib;$(CUDA_PATH)\lib\Win32\cudart_static.lib;$(CUDA_PATH)\lib\Win32\cuda.lib;%(AdditionalDependencies)
@@ -98,12 +98,12 @@
Level3
Disabled
true
- ../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include
+ ../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include;$(SolutionDir)CUDA\
_WINDLL;NOMINMAX;_WINSOCKAPI_;%(PreprocessorDefinitions)
true
- ws2_32.lib;$(CUDA_PATH)\lib\x64\cuda.lib;%(AdditionalDependencies)
+ ws2_32.lib;$(SolutionDir)$(Platform)\$(Configuration)\CUDA.lib;$(CUDA_PATH)\lib\x64\cudart_static.lib;$(CUDA_PATH)\lib\x64\cuda.lib;%(AdditionalDependencies)
@@ -120,14 +120,14 @@
true
true
true
- ../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include
+ ../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include;$(SolutionDir)CUDA\
_WINDLL;NOMINMAX;%(PreprocessorDefinitions)
true
true
true
- ws2_32.lib;$(CUDA_PATH)\lib\Win32\cuda.lib;%(AdditionalDependencies)
+ ws2_32.lib;$(SolutionDir)$(Platform)\$(Configuration)\CUDA.lib;$(CUDA_PATH)\lib\Win32\cudart_static.lib;$(CUDA_PATH)\lib\Win32\cuda.lib;%(AdditionalDependencies)
@@ -144,14 +144,14 @@
true
true
true
- ../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include
+ ../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include;$(SolutionDir)CUDA\
_WINDLL;NOMINMAX;_WINSOCKAPI_;%(PreprocessorDefinitions)
true
true
true
- ws2_32.lib;$(CUDA_PATH)\lib\x64\cuda.lib;%(AdditionalDependencies)
+ ws2_32.lib;$(SolutionDir)$(Platform)\$(Configuration)\CUDA.lib;$(CUDA_PATH)\lib\x64\cudart_static.lib;$(CUDA_PATH)\lib\x64\cuda.lib;%(AdditionalDependencies)
@@ -174,6 +174,9 @@
+
+ {12336478-a663-42f8-8d52-8d093cd99992}
+
{10868996-d864-4e88-8bcb-ba530af64712}