Skip to content

Commit

Permalink
nanashi r10 with proper utf8
Browse files Browse the repository at this point in the history
  • Loading branch information
tpruvot committed Aug 3, 2016
1 parent 4a8e3d9 commit f262850
Show file tree
Hide file tree
Showing 23 changed files with 5,150 additions and 1,912 deletions.
661 changes: 566 additions & 95 deletions Algo256/cuda_blake256.cu

Large diffs are not rendered by default.

304 changes: 203 additions & 101 deletions Algo256/cuda_bmw256.cu

Large diffs are not rendered by default.

482 changes: 273 additions & 209 deletions Algo256/cuda_cubehash256.cu

Large diffs are not rendered by default.

451 changes: 358 additions & 93 deletions Algo256/cuda_skein256.cu

Large diffs are not rendered by default.

582 changes: 568 additions & 14 deletions ccminer.cpp

Large diffs are not rendered by default.

35 changes: 15 additions & 20 deletions ccminer.vcxproj
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -41,10 +41,7 @@
<LinkIncremental>false</LinkIncremental> <LinkIncremental>false</LinkIncremental>
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings" Condition="'$(Platform)'=='Win32'"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.props" />
</ImportGroup>
<ImportGroup Label="ExtensionSettings" Condition="'$(Platform)'=='x64'">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.props" /> <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
Expand Down Expand Up @@ -83,10 +80,10 @@
<CudaCompile> <CudaCompile>
<CInterleavedPTX>false</CInterleavedPTX> <CInterleavedPTX>false</CInterleavedPTX>
<GenerateLineInfo>true</GenerateLineInfo> <GenerateLineInfo>true</GenerateLineInfo>
<MaxRegCount>80</MaxRegCount> <MaxRegCount>255</MaxRegCount>
<PtxAsOptionV>true</PtxAsOptionV> <PtxAsOptionV>true</PtxAsOptionV>
<Keep>true</Keep> <Keep>true</Keep>
<CodeGeneration>compute_50,sm_50</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_35,sm_35;compute_30,sm_30;compute_20,sm_20</CodeGeneration>
<Include>$(NVTOOLSEXT_PATH)\include;..\..\..\Common\C99</Include> <Include>$(NVTOOLSEXT_PATH)\include;..\..\..\Common\C99</Include>
</CudaCompile> </CudaCompile>
</ItemDefinitionGroup> </ItemDefinitionGroup>
Expand Down Expand Up @@ -115,15 +112,16 @@
<CudaCompile> <CudaCompile>
<CInterleavedPTX>false</CInterleavedPTX> <CInterleavedPTX>false</CInterleavedPTX>
<GenerateLineInfo>true</GenerateLineInfo> <GenerateLineInfo>true</GenerateLineInfo>
<MaxRegCount>80</MaxRegCount> <MaxRegCount>255</MaxRegCount>
<PtxAsOptionV>true</PtxAsOptionV> <PtxAsOptionV>true</PtxAsOptionV>
<Keep>true</Keep> <Keep>true</Keep>
<CodeGeneration>compute_50,sm_50</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_35,sm_35;compute_30,sm_30;compute_20,sm_20</CodeGeneration>
<Include>$(NVTOOLSEXT_PATH)\include;..\..\..\Common\C99</Include> <Include>$(NVTOOLSEXT_PATH)\include;..\..\..\Common\C99</Include>
<TargetMachinePlatform>64</TargetMachinePlatform> <TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile> </CudaCompile>
<CudaLink> <CudaLink>
<PerformDeviceLink>false</PerformDeviceLink> <PerformDeviceLink>false</PerformDeviceLink>
<Optimization>O3</Optimization>
</CudaLink> </CudaLink>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
Expand Down Expand Up @@ -158,16 +156,16 @@
</Link> </Link>
<CudaCompile> <CudaCompile>
<CInterleavedPTX>false</CInterleavedPTX> <CInterleavedPTX>false</CInterleavedPTX>
<MaxRegCount>80</MaxRegCount> <MaxRegCount>255</MaxRegCount>
<PtxAsOptionV>true</PtxAsOptionV> <PtxAsOptionV>true</PtxAsOptionV>
<Keep>true</Keep> <Keep>true</Keep>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_30,sm_30;compute_20,sm_21</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_35,sm_35;compute_30,sm_30;compute_20,sm_20</CodeGeneration>
<AdditionalOptions>--ptxas-options="-O2" %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions>--ptxas-options="-O2" %(AdditionalOptions)</AdditionalOptions>
<Optimization>O2</Optimization> <Optimization>O3</Optimization>
</CudaCompile> </CudaCompile>
<CudaLink> <CudaLink>
<GPUDebugInfo>false</GPUDebugInfo> <GPUDebugInfo>false</GPUDebugInfo>
<Optimization>O3</Optimization> <Optimization>O2</Optimization>
</CudaLink> </CudaLink>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
Expand Down Expand Up @@ -201,10 +199,10 @@
</Link> </Link>
<CudaCompile> <CudaCompile>
<CInterleavedPTX>false</CInterleavedPTX> <CInterleavedPTX>false</CInterleavedPTX>
<MaxRegCount>80</MaxRegCount> <MaxRegCount>255</MaxRegCount>
<PtxAsOptionV>true</PtxAsOptionV> <PtxAsOptionV>true</PtxAsOptionV>
<Keep>true</Keep> <Keep>true</Keep>
<CodeGeneration>compute_52,sm_52;compute_50,sm_50;compute_35,sm_35;compute_30,sm_30;compute_20,sm_21</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_35,sm_35;compute_30,sm_30;compute_20,sm_20</CodeGeneration>
<Include>$(NVTOOLSEXT_PATH)\include;..\..\..\Common\C99</Include> <Include>$(NVTOOLSEXT_PATH)\include;..\..\..\Common\C99</Include>
<Optimization>O3</Optimization> <Optimization>O3</Optimization>
<TargetMachinePlatform>64</TargetMachinePlatform> <TargetMachinePlatform>64</TargetMachinePlatform>
Expand Down Expand Up @@ -250,6 +248,7 @@
<ClCompile Include="lyra2\Lyra2.c" /> <ClCompile Include="lyra2\Lyra2.c" />
<ClCompile Include="lyra2\Sponge.c" /> <ClCompile Include="lyra2\Sponge.c" />
<ClInclude Include="lyra2\cuda_lyra2_sm2.cuh" /> <ClInclude Include="lyra2\cuda_lyra2_sm2.cuh" />
<ClInclude Include="lyra2\cuda_lyra2_sm5.cuh" />
<ClInclude Include="neoscrypt\neoscrypt.h" /> <ClInclude Include="neoscrypt\neoscrypt.h" />
<ClCompile Include="neoscrypt\neoscrypt.cpp" /> <ClCompile Include="neoscrypt\neoscrypt.cpp" />
<ClCompile Include="neoscrypt\neoscrypt-cpu.c" /> <ClCompile Include="neoscrypt\neoscrypt-cpu.c" />
Expand Down Expand Up @@ -347,7 +346,6 @@
<ClInclude Include="uint256.h" /> <ClInclude Include="uint256.h" />
<ClInclude Include="lyra2\Lyra2.h" /> <ClInclude Include="lyra2\Lyra2.h" />
<ClInclude Include="lyra2\Sponge.h" /> <ClInclude Include="lyra2\Sponge.h" />
<ClInclude Include="lyra2\cuda_lyra2v2_sm3.cuh" />
<ClInclude Include="quark\groestl_transf_quad.h" /> <ClInclude Include="quark\groestl_transf_quad.h" />
<ClInclude Include="quark\groestl_functions_quad.h" /> <ClInclude Include="quark\groestl_functions_quad.h" />
<ClInclude Include="quark\cuda_quark.h" /> <ClInclude Include="quark\cuda_quark.h" />
Expand Down Expand Up @@ -527,10 +525,7 @@
<Text Include="README.txt" /> <Text Include="README.txt" />
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" Condition="'$(Platform)'=='Win32'"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
</ImportGroup>
<ImportGroup Label="ExtensionTargets" Condition="'$(Platform)'=='x64'">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.targets" /> <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.targets" />
</ImportGroup> </ImportGroup>
<!-- Copy the required dlls --> <!-- Copy the required dlls -->
Expand All @@ -540,4 +535,4 @@
<Target Name="AfterClean"> <Target Name="AfterClean">
<Delete Files="@(FilesToCopy->'$(OutDir)%(Filename)%(Extension)')" TreatErrorsAsWarnings="true" /> <Delete Files="@(FilesToCopy->'$(OutDir)%(Filename)%(Extension)')" TreatErrorsAsWarnings="true" />
</Target> </Target>
</Project> </Project>
8 changes: 4 additions & 4 deletions ccminer.vcxproj.filters
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -437,9 +437,6 @@
<ClInclude Include="bignum.hpp"> <ClInclude Include="bignum.hpp">
<Filter>Header Files</Filter> <Filter>Header Files</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="lyra2\cuda_lyra2v2_sm3.cuh">
<Filter>Source Files\CUDA\lyra2</Filter>
</ClInclude>
<ClInclude Include="lyra2\cuda_lyra2_sm2.cuh"> <ClInclude Include="lyra2\cuda_lyra2_sm2.cuh">
<Filter>Source Files\CUDA\lyra2</Filter> <Filter>Source Files\CUDA\lyra2</Filter>
</ClInclude> </ClInclude>
Expand All @@ -455,6 +452,9 @@
<ClInclude Include="x11\cuda_x11_simd512_sm2.cuh"> <ClInclude Include="x11\cuda_x11_simd512_sm2.cuh">
<Filter>Source Files\CUDA\x11</Filter> <Filter>Source Files\CUDA\x11</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="lyra2\cuda_lyra2_sm5.cuh">
<Filter>Source Files\CUDA\lyra2</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<CudaCompile Include="cuda.cpp"> <CudaCompile Include="cuda.cpp">
Expand Down Expand Up @@ -728,4 +728,4 @@
<Filter>Ressources</Filter> <Filter>Ressources</Filter>
</Text> </Text>
</ItemGroup> </ItemGroup>
</Project> </Project>
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Original file line Diff line number Diff line change
@@ -1,4 +1,4 @@
AC_INIT([ccminer], [1.7.6], [], [ccminer], [http://github.com/tpruvot/ccminer]) AC_INIT([ccminer], [1.7.6-r10], [], [ccminer], [http://github.com/tpruvot/ccminer])


AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM
Expand Down
30 changes: 21 additions & 9 deletions cuda_helper.h
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ __device__ __forceinline__ uint64_t REPLACE_LODWORD(const uint64_t &x, const uin
return (x & 0xFFFFFFFF00000000ULL) | ((uint64_t)y); return (x & 0xFFFFFFFF00000000ULL) | ((uint64_t)y);
} }


// Endian Drehung für 32 Bit Typen
#ifdef __CUDA_ARCH__ #ifdef __CUDA_ARCH__
__device__ __forceinline__ uint32_t cuda_swab32(uint32_t x) __device__ __forceinline__ uint32_t cuda_swab32(uint32_t x)
{ {
Expand Down Expand Up @@ -471,6 +470,15 @@ static __host__ __device__ __forceinline__ uint64_t devectorize(uint2 v) {
#endif #endif
} }


static __device__ __forceinline__ uint2 eorswap32(uint2 u, uint2 v)
{
uint2 result;
result.y = u.x ^ v.x;
result.x = u.y ^ v.y;
return result;
}


/** /**
* uint2 direct ops by c++ operator definitions * uint2 direct ops by c++ operator definitions
*/ */
Expand Down Expand Up @@ -561,11 +569,9 @@ uint2 ROR2(const uint2 a, const int offset)
return result; return result;
} }


__device__ __forceinline__ #if __CUDA_ARCH__ >= 350
uint2 ROL2(const uint2 a, const int offset) __inline__ __device__ uint2 ROL2(const uint2 a, const int offset) {
{
uint2 result; uint2 result;
#if __CUDA_ARCH__ > 300
if (offset >= 32) { if (offset >= 32) {
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.x), "r"(a.y), "r"(offset)); asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.x), "r"(a.y), "r"(offset));
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset)); asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset));
Expand All @@ -574,14 +580,20 @@ uint2 ROL2(const uint2 a, const int offset)
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.y), "r"(a.x), "r"(offset)); asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.y), "r"(a.x), "r"(offset));
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset)); asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset));
} }
return result;
}
#else #else
if (!offset) __inline__ __device__ uint2 ROL2(const uint2 v, const int n)
result = a; {
uint2 result;
if (!n)
result = v;
else else
result = ROR2(a, 64 - offset); result = ROR2(v, 64 - n);
#endif
return result; return result;
} }
#endif


__device__ __forceinline__ __device__ __forceinline__
uint2 SWAPUINT2(uint2 value) uint2 SWAPUINT2(uint2 value)
Expand Down
Loading

0 comments on commit f262850

Please sign in to comment.