import and clean hsr algo

nimbosa · Oct 9, 2017 · ed27598 · ed27598
1 parent cf18cb6
commit ed27598
Show file tree

Hide file tree

Showing 12 changed files with 761 additions and 0 deletions.
diff --git a/Makefile.am b/Makefile.am
@@ -71,6 +71,7 @@ ccminer_SOURCES	= elist.h miner.h compat.h \
 			  x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
 			  x11/cuda_x11_luffa512_Cubehash.cu x11/x11evo.cu x11/timetravel.cu x11/bitcore.cu \
 			  x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
+			  x13/hsr.cu x13/cuda_hsr_sm3.cu x13/sm3.c \
 			  x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu \
 			  x15/whirlpool.cu x15/cuda_x15_whirlpool_sm3.cu \
 			  x17/x17.cu x17/hmq17.cu x17/cuda_x17_haval256.cu x17/cuda_x17_sha512.cu \

diff --git a/algos.h b/algos.h
@@ -22,6 +22,7 @@ enum sha_algos {
 	ALGO_GROESTL,
 	ALGO_HEAVY,		/* Heavycoin hash */
 	ALGO_HMQ1725,
+	ALGO_HSR,
 	ALGO_KECCAK,
 	ALGO_JACKPOT,
 	ALGO_JHA,
@@ -88,6 +89,7 @@ static const char *algo_names[] = {
 	"groestl",
 	"heavy",
 	"hmq1725",
+	"hsr",
 	"keccak",
 	"jackpot",
 	"jha",
@@ -163,6 +165,8 @@ static inline int algo_to_int(char* arg)
 			i = ALGO_LUFFA;
 		else if (!strcasecmp("hmq17", arg))
 			i = ALGO_HMQ1725;
+		else if (!strcasecmp("hshare", arg))
+			i = ALGO_HSR;
 		//else if (!strcasecmp("jackpot", arg))
 		//	i = ALGO_JHA;
 		else if (!strcasecmp("lyra2re", arg))

diff --git a/bench.cpp b/bench.cpp
@@ -66,6 +66,7 @@ void algo_free_all(int thr_id)
 	free_groestlcoin(thr_id);
 	free_heavy(thr_id);
 	free_hmq17(thr_id);
+	free_hsr(thr_id);
 	free_jackpot(thr_id);
 	free_jha(thr_id);
 	free_lbry(thr_id);

diff --git a/ccminer.cpp b/ccminer.cpp
@@ -2227,6 +2227,7 @@ static void *miner_thread(void *userdata)
 			case ALGO_HEAVY:
 			case ALGO_JACKPOT:
 			case ALGO_JHA:
+			case ALGO_HSR:
 			case ALGO_LYRA2v2:
 			case ALGO_PHI:
 			case ALGO_S3:
@@ -2361,6 +2362,9 @@ static void *miner_thread(void *userdata)
 		case ALGO_HMQ1725:
 			rc = scanhash_hmq17(thr_id, &work, max_nonce, &hashes_done);
 			break;
+		case ALGO_HSR:
+			rc = scanhash_hsr(thr_id, &work, max_nonce, &hashes_done);
+			break;
 
 		case ALGO_HEAVY:
 			rc = scanhash_heavy(thr_id, &work, max_nonce, &hashes_done, work.maxvote, HEAVYCOIN_BLKHDR_SZ);

diff --git a/ccminer.vcxproj b/ccminer.vcxproj
@@ -331,6 +331,7 @@
     <ClCompile Include="sph\hamsi.c" />
     <ClCompile Include="sph\hamsi_helper.c" />
     <ClCompile Include="sph\whirlpool.c" />
+    <ClCompile Include="x13\sm3.c" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="compat.h" />
@@ -576,6 +577,8 @@
     <CudaCompile Include="x13\cuda_x13_fugue512.cu">
     </CudaCompile>
     <CudaCompile Include="x13\x13.cu" />
+    <CudaCompile Include="x13\cuda_hsr_sm3.cu" />
+    <CudaCompile Include="x13\hsr.cu" />
     <CudaCompile Include="x15\x14.cu" />
     <CudaCompile Include="x15\cuda_x14_shabal512.cu" />
     <CudaCompile Include="x15\cuda_x15_whirlpool.cu" />

diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters
@@ -225,6 +225,9 @@
     <ClCompile Include="sph\streebog.c">
       <Filter>Source Files\sph</Filter>
     </ClCompile>
+    <ClCompile Include="x13\sm3.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
     <ClCompile Include="compat\winansi.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -727,6 +730,12 @@
     <CudaCompile Include="x13\x13.cu">
       <Filter>Source Files\CUDA\x13</Filter>
     </CudaCompile>
+    <CudaCompile Include="x13\cuda_hsr_sm3.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x13\hsr.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
     <CudaCompile Include="cuda_checkhash.cu">
       <Filter>Source Files\CUDA</Filter>
     </CudaCompile>

diff --git a/miner.h b/miner.h
@@ -289,6 +289,7 @@ extern int scanhash_fugue256(int thr_id, struct work* work, uint32_t max_nonce,
 extern int scanhash_groestlcoin(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_hmq17(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_heavy(int thr_id,struct work *work, uint32_t max_nonce, unsigned long *hashes_done, uint32_t maxvote, int blocklen);
+extern int scanhash_hsr(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_jha(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_jackpot(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); // quark method
 extern int scanhash_lbry(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done);
@@ -351,6 +352,7 @@ extern void free_fugue256(int thr_id);
 extern void free_groestlcoin(int thr_id);
 extern void free_heavy(int thr_id);
 extern void free_hmq17(int thr_id);
+extern void free_hsr(int thr_id);
 extern void free_jackpot(int thr_id);
 extern void free_jha(int thr_id);
 extern void free_lbry(int thr_id);
@@ -892,6 +894,7 @@ void fresh_hash(void *state, const void *input);
 void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
 void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
 void hmq17hash(void *output, const void *input);
+void hsr_hash(void *output, const void *input);
 void keccak256_hash(void *state, const void *input);
 void jackpothash(void *state, const void *input);
 void groestlhash(void *state, const void *input);

diff --git a/util.cpp b/util.cpp
@@ -2206,6 +2206,9 @@ void print_hash_tests(void)
 	hmq17hash(&hash[0], &buf[0]);
 	printpfx("hmq1725", hash);
 
+	hsr_hash(&hash[0], &buf[0]);
+        printpfx("hsr", hash);
+
 	jha_hash(&hash[0], &buf[0]);
 	printpfx("jha", hash);
 

diff --git a/x13/cuda_hsr_sm3.cu b/x13/cuda_hsr_sm3.cu
@@ -0,0 +1,139 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+#include <cuda_helper.h>
+#include <miner.h>
+
+#define  F(x, y, z) (((x) ^ (y) ^ (z)))
+#define FF(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z)))
+#define GG(x, y, z) ((z)  ^ ((x) & ((y) ^ (z))))
+
+#define P0(x) x ^ ROTL32(x,  9) ^ ROTL32(x, 17)
+#define P1(x) x ^ ROTL32(x, 15) ^ ROTL32(x, 23)
+
+static __forceinline__ __device__
+void sm3_compress2(uint32_t digest[8], const uint32_t pblock[16])
+{
+	uint32_t tt1, tt2, i, t, ss1, ss2, x, y;
+	uint32_t w[68];
+	uint32_t a = digest[0];
+	uint32_t b = digest[1];
+	uint32_t c = digest[2];
+	uint32_t d = digest[3];
+	uint32_t e = digest[4];
+	uint32_t f = digest[5];
+	uint32_t g = digest[6];
+	uint32_t h = digest[7];
+
+	#pragma unroll
+	for (i = 0; i<16; i++) {
+		w[i] = cuda_swab32(pblock[i]);
+	}
+
+	for (i = 16; i<68; i++) {
+		x = ROTL32(w[i - 3], 15);
+		y = ROTL32(w[i - 13], 7);
+
+		x ^= w[i - 16];
+		x ^= w[i - 9];
+		y ^= w[i - 6];
+
+		w[i] = P1(x) ^ y;
+	}
+
+	for (i = 0; i<64; i++) {
+
+		t = (i < 16) ? 0x79cc4519 : 0x7a879d8a;
+
+		ss2 = ROTL32(a, 12);
+		ss1 = ROTL32(ss2 + e + ROTL32(t, i), 7);
+		ss2 ^= ss1;
+
+		tt1 = d + ss2 + (w[i] ^ w[i + 4]);
+		tt2 = h + ss1 + w[i];
+
+		if (i < 16) {
+			tt1 += F(a, b, c);
+			tt2 += F(e, f, g);
+		}
+		else {
+			tt1 += FF(a, b, c);
+			tt2 += GG(e, f, g);
+		}
+		d = c;
+		c = ROTL32(b, 9);
+		b = a;
+		a = tt1;
+		h = g;
+		g = ROTL32(f, 19);
+		f = e;
+		e = P0(tt2);
+	}
+
+	digest[0] ^= a;
+	digest[1] ^= b;
+	digest[2] ^= c;
+	digest[3] ^= d;
+	digest[4] ^= e;
+	digest[5] ^= f;
+	digest[6] ^= g;
+	digest[7] ^= h;
+}
+
+/***************************************************/
+// GPU Hash Function
+__global__
+void sm3_gpu_hash_64(const uint32_t threads, uint32_t *g_hash)
+{
+	const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
+
+	if (thread < threads)
+	{
+		const size_t hashPosition = thread;
+
+		uint32_t digest[8];
+		digest[0] = 0x7380166F;
+		digest[1] = 0x4914B2B9;
+		digest[2] = 0x172442D7;
+		digest[3] = 0xDA8A0600;
+		digest[4] = 0xA96F30BC;
+		digest[5] = 0x163138AA;
+		digest[6] = 0xE38DEE4D;
+		digest[7] = 0xB0FB0E4E;
+
+		uint32_t *pHash = &g_hash[hashPosition << 4];
+		sm3_compress2(digest, pHash);
+
+		uint32_t block[16];
+		block[0] = 0x80;
+
+		#pragma unroll
+		for (int i = 1; i < 14; i++)
+			block[i] = 0;
+
+		// count
+		block[14] = cuda_swab32(1 >> 23);
+		block[15] = cuda_swab32((1 << 9) + (0 << 3));
+
+		sm3_compress2(digest, block);
+
+		for (int i = 0; i < 8; i++)
+			pHash[i] = cuda_swab32(digest[i]);
+
+		for (int i = 8; i < 16; i++)
+			pHash[i] = 0;
+	}
+}
+
+__host__
+void sm3_cuda_hash_64(int thr_id, uint32_t threads, uint32_t *g_hash, int order)
+{
+	const uint32_t threadsperblock = 256;
+
+	dim3 grid((threads + threadsperblock - 1) / threadsperblock);
+	dim3 block(threadsperblock);
+
+	sm3_gpu_hash_64 <<<grid, block>>>(threads, g_hash);
+	//MyStreamSynchronize(NULL, order, thr_id);
+}