Skip to content
This repository has been archived by the owner on Nov 8, 2021. It is now read-only.

Commit

Permalink
Convert RGBA texture to NV12 CUDA buffer for Windows 7.
Browse files Browse the repository at this point in the history
  • Loading branch information
polygraphene committed Jun 15, 2018
1 parent c3173f8 commit 7540851
Show file tree
Hide file tree
Showing 8 changed files with 230 additions and 20 deletions.
10 changes: 10 additions & 0 deletions ALVR.sln
Expand Up @@ -9,6 +9,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ALVR", "ALVR\ALVR.csproj",
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "alvr_server", "alvr_server\driver_virtual_display.vcxproj", "{6FB51D67-327E-4A18-BC74-7AA6AAB4C827}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CUDA", "CUDA\CUDA.vcxproj", "{12336478-A663-42F8-8D52-8D093CD99992}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -51,6 +53,14 @@ Global
{6FB51D67-327E-4A18-BC74-7AA6AAB4C827}.Release|Win32.Build.0 = Release|Win32
{6FB51D67-327E-4A18-BC74-7AA6AAB4C827}.Release|x64.ActiveCfg = Release|x64
{6FB51D67-327E-4A18-BC74-7AA6AAB4C827}.Release|x64.Build.0 = Release|x64
{12336478-A663-42F8-8D52-8D093CD99992}.Debug|Any CPU.ActiveCfg = Debug|x64
{12336478-A663-42F8-8D52-8D093CD99992}.Debug|Win32.ActiveCfg = Debug|x64
{12336478-A663-42F8-8D52-8D093CD99992}.Debug|x64.ActiveCfg = Debug|x64
{12336478-A663-42F8-8D52-8D093CD99992}.Debug|x64.Build.0 = Debug|x64
{12336478-A663-42F8-8D52-8D093CD99992}.Release|Any CPU.ActiveCfg = Release|x64
{12336478-A663-42F8-8D52-8D093CD99992}.Release|Win32.ActiveCfg = Release|x64
{12336478-A663-42F8-8D52-8D093CD99992}.Release|x64.ActiveCfg = Release|x64
{12336478-A663-42F8-8D52-8D093CD99992}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
90 changes: 90 additions & 0 deletions CUDA/CUDA.vcxproj
@@ -0,0 +1,90 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{12336478-A663-42F8-8D52-8D093CD99992}</ProjectGuid>
<RootNamespace>CUDA</RootNamespace>
<WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v141</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v141</PlatformToolset>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 9.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<CudaCompile>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<CudaCompile>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<CudaCompile Include="RGBToNV12.cu" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="RGBToNV12.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 9.2.targets" />
</ImportGroup>
</Project>
88 changes: 88 additions & 0 deletions CUDA/RGBToNV12.cu
@@ -0,0 +1,88 @@
#include <stdint.h>

#include "RGBToNV12.h"

__device__ float rgb2y(uchar4 c) {
return 0.257f * c.x + 0.504f * c.y + 0.098f * c.z + 16.0f;
}

__device__ float rgb2u(uchar4 c) {
return -0.148f * c.x - 0.291f * c.y + 0.439f * c.z + 128.0f;
}

__device__ float rgb2v(uchar4 c) {
return 0.439f * c.x - 0.368f * c.y - 0.071f * c.z + 128.0f;
}

texture<uchar4, cudaTextureType2D, cudaReadModeElementType> texRef;

__global__ void RGBA2NV12_kernel(uint8_t *dstImage, size_t destPitch,
uint32_t width, uint32_t height)
{
// Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
int32_t x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
int32_t y = blockIdx.y * (blockDim.y << 1) + (threadIdx.y << 1);

int x1 = x + 1;
int y1 = y + 1;

if (x1 >= width)
return; //x = width - 1;

if (y1 >= height)
return; // y = height - 1;

uchar4 c00 = tex2D(texRef, x, y);
uchar4 c01 = tex2D(texRef, x1, y);
uchar4 c10 = tex2D(texRef, x, y1);
uchar4 c11 = tex2D(texRef, x1, y1);

uint8_t y00 = (uint8_t)(rgb2y(c00) + 0.5f);
uint8_t y01 = (uint8_t)(rgb2y(c01) + 0.5f);
uint8_t y10 = (uint8_t)(rgb2y(c10) + 0.5f);
uint8_t y11 = (uint8_t)(rgb2y(c11) + 0.5f);

uint8_t u = (uint8_t)((rgb2u(c00) + rgb2u(c01) + rgb2u(c10) + rgb2u(c11)) * 0.25f + 0.5f);
uint8_t v = (uint8_t)((rgb2v(c00) + rgb2v(c01) + rgb2v(c10) + rgb2v(c11)) * 0.25f + 0.5f);

dstImage[destPitch * y + x] = y00;
dstImage[destPitch * y + x1] = y01;
dstImage[destPitch * y1 + x] = y10;
dstImage[destPitch * y1 + x1] = y11;

uint32_t chromaOffset = destPitch * height;
int32_t x_chroma = x;
int32_t y_chroma = y >> 1;

dstImage[chromaOffset + destPitch * y_chroma + x_chroma] = u;
dstImage[chromaOffset + destPitch * y_chroma + x_chroma + 1] = v;
}

extern "C"
cudaError_t RGBA2NV12(cudaArray *srcImage,
uint8_t *dstImage, size_t destPitch,
uint32_t width, uint32_t height)
{
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned);

// Set texture parameters
texRef.addressMode[0] = cudaAddressModeWrap;
texRef.addressMode[1] = cudaAddressModeWrap;
texRef.filterMode = cudaFilterModePoint;
texRef.normalized = false;

cudaError_t cudaStatus = cudaBindTextureToArray(texRef, srcImage, channelDesc);
if (cudaStatus != cudaSuccess) {
return cudaStatus;
}

dim3 block(32, 16, 1);
dim3 grid((width + (2 * block.x - 1)) / (2 * block.x), (height + (2 * block.y - 1)) / (2 * block.y), 1);

RGBA2NV12_kernel<<<grid, block>>>(dstImage, destPitch, width, height);

cudaThreadSynchronize();

cudaStatus = cudaGetLastError();
return cudaStatus;
}
8 changes: 8 additions & 0 deletions CUDA/RGBToNV12.h
@@ -0,0 +1,8 @@
#pragma once

#include <cuda.h>

extern "C"
cudaError_t RGBA2NV12(cudaArray *srcImage,
uint8_t *dstImage, size_t destPitch,
uint32_t width, uint32_t height);
27 changes: 18 additions & 9 deletions alvr_server/CudaConverter.h
Expand Up @@ -8,6 +8,8 @@
#include <cuda_runtime_api.h>
#include <cuda_d3d11_interop.h>

#include <RGBToNV12.h>

#include "Logger.h"

using Microsoft::WRL::ComPtr;
Expand Down Expand Up @@ -39,25 +41,32 @@ class CudaConverter {
return m_cuContext;
}

void Convert(const ComPtr<ID3D11Texture2D> &texture) {
void Convert(const ComPtr<ID3D11Texture2D> &texture, const NvEncInputFrame* encoderInputFrame) {
cudaError cuStatus;

RegisterTexture(texture);

cuStatus = cudaGraphicsMapResources(1, &m_cudaResource, 0);
if (cuStatus != cudaSuccess) {
throw MakeException("cudaGraphicsMapResources failed.");
}

cudaArray *cuArray;
cudaError cuStatus = cudaGraphicsSubResourceGetMappedArray(&cuArray, m_cudaResource, 0, 0);
cuStatus = cudaGraphicsSubResourceGetMappedArray(&cuArray, m_cudaResource, 0, 0);
if (cuStatus != cudaSuccess) {
throw MakeException("cudaGraphicsSubResourceGetMappedArray failed.");
}

// then we want to copy cudaLinearMemory to the D3D texture, via its mapped form : cudaArray
cuStatus = cudaMemcpy2DFromArray(
m_cudaLinearMemory, m_pitch, // dst array
cuArray, // src
0, 0,
m_width, m_height, // extent
cudaMemcpyDeviceToDevice); // kind
cuStatus = RGBA2NV12(cuArray, (uint8_t *)encoderInputFrame->inputPtr, encoderInputFrame->pitch, m_width, m_height);

if (cuStatus != cudaSuccess) {
throw MakeException("cudaMemcpy2DFromArray failed.");
}

cudaGraphicsUnmapResources(1, &m_cudaResource, 0);
if (cuStatus != cudaSuccess) {
throw MakeException("cudaGraphicsUnmapResources failed.");
}
}

private:
Expand Down
2 changes: 1 addition & 1 deletion alvr_server/NvCodecUtils.h
Expand Up @@ -24,7 +24,7 @@ inline bool check(CUresult e, int iLine, const char *szFile) {
if (e != CUDA_SUCCESS) {
const char *szErrName = NULL;
cuGetErrorName(e, &szErrName);
LOG(FATAL) << "CUDA driver API error " << szErrName << " at line " << iLine << " in file " << szFile;
Log("CUDA driver API error %s at line %d in file %s", szErrName, iLine, szFile);
return false;
}
return true;
Expand Down
6 changes: 4 additions & 2 deletions alvr_server/alvr_server.cpp
Expand Up @@ -32,7 +32,8 @@
#include "packet_types.h"
#include "resource.h"
#include "Tracking.h"
#include "CudaConverter.h"
#include "CudaConverter.h"
#include "RGBToNV12.h"

HINSTANCE g_hInstance;

Expand Down Expand Up @@ -194,7 +195,7 @@ namespace
{
try {
Log("ConvertRGBToNV12 start");
m_Converter->Convert(pTexture);
m_Converter->Convert(pTexture, encoderInputFrame);
Log("ConvertRGBToNV12 end");
}
catch (NVENCException e) {
Expand Down Expand Up @@ -1374,6 +1375,7 @@ class CServerDriver_DisplayRedirect : public vr::IServerTrackedDeviceProvider
std::shared_ptr<IPCMutex> m_mutex;
};

extern "C" int main_test();
vr::EVRInitError CServerDriver_DisplayRedirect::Init( vr::IVRDriverContext *pContext )
{
VR_INIT_SERVER_DRIVER_CONTEXT( pContext );
Expand Down
19 changes: 11 additions & 8 deletions alvr_server/driver_virtual_display.vcxproj
Expand Up @@ -78,12 +78,12 @@
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include;$(SolutionDir)CUDA\</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_WINDLL;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>ws2_32.lib;$(CUDA_PATH)\lib\Win32\cuda.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>ws2_32.lib;$(SolutionDir)$(Platform)\$(Configuration)\CUDA.lib;$(CUDA_PATH)\lib\Win32\cudart_static.lib;$(CUDA_PATH)\lib\Win32\cuda.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<CustomBuildStep>
<Command>
Expand All @@ -98,12 +98,12 @@
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include;$(SolutionDir)CUDA\</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_WINDLL;NOMINMAX;_WINSOCKAPI_;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>ws2_32.lib;$(CUDA_PATH)\lib\x64\cuda.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>ws2_32.lib;$(SolutionDir)$(Platform)\$(Configuration)\CUDA.lib;$(CUDA_PATH)\lib\x64\cudart_static.lib;$(CUDA_PATH)\lib\x64\cuda.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<CustomBuildStep>
<Command>
Expand All @@ -120,14 +120,14 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include;$(SolutionDir)CUDA\</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_WINDLL;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>ws2_32.lib;$(CUDA_PATH)\lib\Win32\cuda.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>ws2_32.lib;$(SolutionDir)$(Platform)\$(Configuration)\CUDA.lib;$(CUDA_PATH)\lib\Win32\cudart_static.lib;$(CUDA_PATH)\lib\Win32\cuda.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<CustomBuildStep>
<Command>
Expand All @@ -144,14 +144,14 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>../shared;../openvr/headers;include;$(SolutionDir)include;$(CUDA_PATH)/include;$(SolutionDir)CUDA\</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_WINDLL;NOMINMAX;_WINSOCKAPI_;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>ws2_32.lib;$(CUDA_PATH)\lib\x64\cuda.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>ws2_32.lib;$(SolutionDir)$(Platform)\$(Configuration)\CUDA.lib;$(CUDA_PATH)\lib\x64\cudart_static.lib;$(CUDA_PATH)\lib\x64\cuda.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<CustomBuildStep>
<Command>
Expand All @@ -174,6 +174,9 @@
<ClCompile Include="UdpSocket.cpp" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\CUDA\CUDA.vcxproj">
<Project>{12336478-a663-42f8-8d52-8d093cd99992}</Project>
</ProjectReference>
<ProjectReference Include="..\shared\shared.vcxproj">
<Project>{10868996-d864-4e88-8bcb-ba530af64712}</Project>
</ProjectReference>
Expand Down

0 comments on commit 7540851

Please sign in to comment.