Permalink
Browse files

Added register R8, there are 9 registers in total now

  • Loading branch information...
SChernykh committed Feb 11, 2019
1 parent 0a0f9ee commit f1463c9963893da2495dab591b3244cab915d6f1
Showing with 42 additions and 30 deletions.
  1. +2 −1 src/crypto/slow-hash.c
  2. +30 −19 src/crypto/variant4_random_math.h
  3. +10 −10 tests/hash/tests-slow-4.txt
@@ -225,7 +225,7 @@ extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *ex
} while (0)

#define VARIANT4_RANDOM_MATH_INIT() \
v4_reg r[8]; \
v4_reg r[9]; \
struct V4_Instruction code[NUM_INSTRUCTIONS_MAX + 1]; \
do if (variant >= 4) \
{ \
@@ -251,6 +251,7 @@ extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *ex
V4_REG_LOAD(r + 5, (uint64_t*)(a) + 1); \
V4_REG_LOAD(r + 6, _b); \
V4_REG_LOAD(r + 7, _b1); \
V4_REG_LOAD(r + 8, (uint64_t*)(_b1) + 1); \
\
v4_random_math(code, r); \
} while (0)
@@ -144,16 +144,16 @@ static FORCEINLINE void v4_random_math(const struct V4_Instruction* code, v4_reg
// Generated program can have 60 + a few more (usually 2-3) instructions to achieve required latency
// I've checked all block heights < 10,000,000 and here is the distribution of program sizes:
//
// 60 28495
// 61 106077
// 62 2455855
// 63 5114930
// 64 1020868
// 65 1109026
// 66 151756
// 67 8429
// 68 4477
// 69 87
// 60 27960
// 61 105054
// 62 2452759
// 63 5115997
// 64 1022269
// 65 1109635
// 66 153145
// 67 8550
// 68 4529
// 69 102

// Unroll 70 instructions here
V4_EXEC_10(0); // instructions 0-9
@@ -208,18 +208,22 @@ static inline int v4_random_math_init(struct V4_Instruction* code, const uint64_
size_t data_index = sizeof(data);

int code_size;

// There is a small chance (1.8%) that register R8 won't be used in the generated program
// So we keep track of it and try again if it's not used
bool r8_used;
do {
int latency[8];
int asic_latency[8];
int latency[9];
int asic_latency[9];

// Tracks previous instruction and value of the source operand for registers R0-R3 throughout code execution
// byte 0: current value of the destination register
// byte 1: instruction opcode
// byte 2: current value of the source register
//
// Registers R4-R7 are constant and are treated as having the same value because when we do
// Registers R4-R8 are constant and are treated as having the same value because when we do
// the same operation twice with two constant source registers, it can be optimized into a single operation
uint32_t inst_data[8] = { 0, 1, 2, 3, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF };
uint32_t inst_data[9] = { 0, 1, 2, 3, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF };

bool alu_busy[TOTAL_LATENCY + 1][ALU_COUNT];
bool is_rotation[V4_INSTRUCTION_COUNT];
@@ -238,6 +242,7 @@ static inline int v4_random_math_init(struct V4_Instruction* code, const uint64_
code_size = 0;

int total_iterations = 0;
r8_used = false;

// Generate random code to achieve minimal required latency for our abstract CPU
// Try to get this latency for all 4 registers
@@ -281,9 +286,9 @@ static inline int v4_random_math_init(struct V4_Instruction* code, const uint64_
// Don't do ADD/SUB/XOR with the same register
if (((opcode == ADD) || (opcode == SUB) || (opcode == XOR)) && (a == b))
{
// a is always < 4, so we don't need to check bounds here
b = a + 4;
src_index = b;
// Use register R8 as source instead
b = 8;
src_index = 8;
}

// Don't do rotation with the same destination twice because it's equal to a single rotation
@@ -363,6 +368,11 @@ static inline int v4_random_math_init(struct V4_Instruction* code, const uint64_
code[code_size].src_index = src_index;
code[code_size].C = 0;

if (src_index == 8)
{
r8_used = true;
}

if (opcode == ADD)
{
// ADD instruction is implemented as two 1-cycle instructions on a real CPU, so mark ALU as busy for the next cycle too
@@ -414,8 +424,9 @@ static inline int v4_random_math_init(struct V4_Instruction* code, const uint64_
++code_size;
}

// There is ~99.8% chance that NUM_INSTRUCTIONS_MIN <= code_size <= NUM_INSTRUCTIONS_MAX here, so second iteration is required rarely
} while ((code_size < NUM_INSTRUCTIONS_MIN) || (code_size > NUM_INSTRUCTIONS_MAX));
// There is ~98.15% chance that loop condition is false, so this loop will execute only 1 iteration most of the time
// It never does more than 4 iterations for all block heights < 10,000,000
} while (!r8_used || (code_size < NUM_INSTRUCTIONS_MIN) || (code_size > NUM_INSTRUCTIONS_MAX));

// It's guaranteed that NUM_INSTRUCTIONS_MIN <= code_size <= NUM_INSTRUCTIONS_MAX here
// Add final instruction to stop the interpreter
@@ -1,10 +1,10 @@
9d47bf4c41b7e8e727e681715acb47fa1677cdba9ca7bcb05ad8cc8abd5daa66 5468697320697320612074657374205468697320697320612074657374205468697320697320612074657374 1806260
0d4a495cb844a3ca8ba4edb8e6bcf829ef1c06d9cdea2b62ca46c2a21b8b0a79 4c6f72656d20697073756d20646f6c6f722073697420616d65742c20636f6e73656374657475722061646970697363696e67 1806261
a1d6d848b5c5915fccd2f64cf216c6b1a02cf7c77bc80d8d4e51b419e88ff0dd 656c69742c2073656420646f20656975736d6f642074656d706f7220696e6369646964756e74207574206c61626f7265 1806262
af3a8544a0221a148c2ac90484b19861e3afca33fe17021efb8ad6496b567915 657420646f6c6f7265206d61676e6120616c697175612e20557420656e696d206164206d696e696d2076656e69616d2c 1806263
313399e0963ae8a99dab8af66d343e097dae0c0feb08dbc43ccdafef5515f413 71756973206e6f737472756420657865726369746174696f6e20756c6c616d636f206c61626f726973206e697369 1806264
6021c6ef90bff9ae94a7506d623d3a7a86c1756d655f50dd558f716d64622a34 757420616c697175697020657820656120636f6d6d6f646f20636f6e7365717561742e20447569732061757465 1806265
2b13000535f3db5f9b9b84a65c4351f386cd2cdedebb8c3ad2eab086e6a3fee5 697275726520646f6c6f7220696e20726570726568656e646572697420696e20766f6c7570746174652076656c6974 1806266
fc0e1dad8e895749dc90eb690bc1ba059a1cd772afaaf65a106bf9e5e6b80503 657373652063696c6c756d20646f6c6f726520657520667567696174206e756c6c612070617269617475722e 1806267
b60b0afe144deff7d903ed2d5545e77ebe66a3c51fee7016eeb8fee9eb630c0f 4578636570746575722073696e74206f6363616563617420637570696461746174206e6f6e2070726f6964656e742c 1806268
64774b27e7d5fec862fc4c0c13ac6bf09123b6f05bb0e4b75c97f379a2b3a679 73756e7420696e2063756c706120717569206f666669636961206465736572756e74206d6f6c6c697420616e696d20696420657374206c61626f72756d2e 1806269
47c996e2d6aa453f50b15a6e829a8c6e5070500c08ba2426019510753e31af42 5468697320697320612074657374205468697320697320612074657374205468697320697320612074657374 1806260
eb17f755e8f394ff911603826b0e2a37c3f40a5990693a1be7e39cd5c178f0b4 4c6f72656d20697073756d20646f6c6f722073697420616d65742c20636f6e73656374657475722061646970697363696e67 1806261
f4de6adc61efa498fd4929ed00e88ed8e12caa2907f99cb42442567d3da9daec 656c69742c2073656420646f20656975736d6f642074656d706f7220696e6369646964756e74207574206c61626f7265 1806262
03004aaa1cdbda343fcbc835aaca191b8577c21267dadd0e4e86a57e68614a71 657420646f6c6f7265206d61676e6120616c697175612e20557420656e696d206164206d696e696d2076656e69616d2c 1806263
2081cb5646549b44356f5c81787c529367751bc1cdd5f1ea8c0a333b5e49e220 71756973206e6f737472756420657865726369746174696f6e20756c6c616d636f206c61626f726973206e697369 1806264
653c57f666f6fa1121b82f217485f6fda64ce58bf311d664e92da9119c7d5b95 757420616c697175697020657820656120636f6d6d6f646f20636f6e7365717561742e20447569732061757465 1806265
20c4b9f8ded7fd1e348341ce2b6297e5ac330e588e5f34985446fd20346b69e3 697275726520646f6c6f7220696e20726570726568656e646572697420696e20766f6c7570746174652076656c6974 1806266
82e35ae2f7258fb5cb6b53b332f898ac19c385b49fc35e32ae0c5ab56025f763 657373652063696c6c756d20646f6c6f726520657520667567696174206e756c6c612070617269617475722e 1806267
ddcf95b7d0066668a1d36d4115de1a2bc52dd1f95d94366ec34c8c3c7196e5dd 4578636570746575722073696e74206f6363616563617420637570696461746174206e6f6e2070726f6964656e742c 1806268
882c2bddf05736ab1072c678c3b75661813709a2ac1fd6e861f2dcc65d466b90 73756e7420696e2063756c706120717569206f666669636961206465736572756e74206d6f6c6c697420616e696d20696420657374206c61626f72756d2e 1806269

0 comments on commit f1463c9

Please sign in to comment.