From 6d83160f7a047bccd576e45a454820ca02968e27 Mon Sep 17 00:00:00 2001 From: Pau Kaifler Date: Tue, 15 Dec 2020 15:42:16 +0100 Subject: [PATCH 1/6] benchmarks: Implement SM3 --- benchmarks/Makefile | 4 + benchmarks/sm3/api_sm3.h | 20 ++++ benchmarks/sm3/reference/Makefile.in | 5 + benchmarks/sm3/reference/sm3.c | 121 ++++++++++++++++++++++ benchmarks/sm3/zscrypto_rv32/Makefile.in | 13 +++ benchmarks/sm3/zscrypto_rv32/sm3.c | 122 +++++++++++++++++++++++ benchmarks/sm3/zscrypto_rv64/Makefile.in | 13 +++ benchmarks/sm3/zscrypto_rv64/sm3.c | 122 +++++++++++++++++++++++ benchmarks/test/Makefile.in | 6 ++ 9 files changed, 426 insertions(+) create mode 100644 benchmarks/sm3/api_sm3.h create mode 100644 benchmarks/sm3/reference/Makefile.in create mode 100644 benchmarks/sm3/reference/sm3.c create mode 100644 benchmarks/sm3/zscrypto_rv32/Makefile.in create mode 100644 benchmarks/sm3/zscrypto_rv32/sm3.c create mode 100644 benchmarks/sm3/zscrypto_rv64/Makefile.in create mode 100644 benchmarks/sm3/zscrypto_rv64/sm3.c diff --git a/benchmarks/Makefile b/benchmarks/Makefile index b17d74f4..4c3960c0 100644 --- a/benchmarks/Makefile +++ b/benchmarks/Makefile @@ -40,6 +40,10 @@ include sha512/zscrypto_rv64/Makefile.in include sha3/reference/Makefile.in include sha3/zscrypto_rv64/Makefile.in +include sm3/reference/Makefile.in +include sm3/zscrypto_rv32/Makefile.in +include sm3/zscrypto_rv64/Makefile.in + include permutation/Makefile.in include test/Makefile.in diff --git a/benchmarks/sm3/api_sm3.h b/benchmarks/sm3/api_sm3.h new file mode 100644 index 00000000..a1110dd2 --- /dev/null +++ b/benchmarks/sm3/api_sm3.h @@ -0,0 +1,20 @@ + +/*! +@defgroup crypto_hash_sm3 Crypto Hash SM3 +@{ +*/ + +#include +#include + +#include "riscvcrypto/share/util.h" + +#ifndef __API_SM3__ +#define __API_SM3__ + +// Hashes `message` with `len` bytes with SM3 and stores it to `hash` +void sm3_hash(uint8_t hash[32], const uint8_t *message, size_t len); + +//! @} + +#endif // __API_SM3__ diff --git a/benchmarks/sm3/reference/Makefile.in b/benchmarks/sm3/reference/Makefile.in new file mode 100644 index 00000000..1af21b79 --- /dev/null +++ b/benchmarks/sm3/reference/Makefile.in @@ -0,0 +1,5 @@ + +HASH_SM3_REF_FILES = \ + sm3/reference/sm3.c + +$(eval $(call add_lib_target,sm3_reference,$(HASH_SM3_REF_FILES))) diff --git a/benchmarks/sm3/reference/sm3.c b/benchmarks/sm3/reference/sm3.c new file mode 100644 index 00000000..d949d1ad --- /dev/null +++ b/benchmarks/sm3/reference/sm3.c @@ -0,0 +1,121 @@ + +#include +#include + +#include "riscvcrypto/sm3/api_sm3.h" + +// The block size in bytes +#define SM3_BLOCK_SIZE (16 * sizeof(uint32_t)) + +// Reverses the byte order of `V` +#define REVERSE_BITS_32(V) \ + (((V & 0x000000FF) << 24) | (((V)&0x0000FF00) << 8) | \ + (((V)&0x00FF0000) >> 8) | (((V)&0xFF000000) >> 24)) + +// Rotates `V` by `N` bits to the left +#define SM3_ROTATE_32(V, N) (((V) << (N)) | ((V) >> (32 - (N)))) + +// The two permutation functions +#define SM3_P0(X) ((X) ^ SM3_ROTATE_32((X), 9) ^ SM3_ROTATE_32((X), 17)) +#define SM3_P1(X) ((X) ^ SM3_ROTATE_32((X), 15) ^ SM3_ROTATE_32((X), 23)) + +// Expands the state `s` to `w` +static void sm3_expand(uint32_t w[68], uint32_t s[24]) { + for (int i = 0; i < 16; ++i) { + w[i] = REVERSE_BITS_32(s[i + 8]); + } + + for (int i = 16; i < 68; ++i) { + w[i] = SM3_P1(w[i - 16] ^ w[i - 9] ^ SM3_ROTATE_32(w[i - 3], 15)) ^ + SM3_ROTATE_32(w[i - 13], 7) ^ w[i - 6]; + } +} + +// Compresses `s` in place +static void sm3_compress(uint32_t s[24]) { + uint32_t w[68]; + sm3_expand(w, s); + + // The IV and iteration state + uint32_t x[8]; + memcpy(x, s, 8 * sizeof(uint32_t)); + + // The state update transformation below uses and modifies `x` + // depending on the expansion `w` and the current iteration `i` + for (int i = 0; i < 64; ++i) { + // The round constant `t` provides additional randomness + uint32_t t = (i < 16) ? 0x79CC4519 : 0x7A879D8A; + uint32_t rot = SM3_ROTATE_32(x[0], 12); + uint32_t ss1 = SM3_ROTATE_32(rot + x[4] + SM3_ROTATE_32(t, i % 32), 7); + uint32_t ss2 = ss1 ^ rot; + uint32_t w_i = w[i] ^ w[i + 4]; + + uint32_t tt1, tt2; + if (i < 16) { + tt1 = (x[0] ^ x[1] ^ x[2]) + x[3] + ss2 + w_i; + tt2 = (x[4] ^ x[5] ^ x[6]) + x[7] + ss1 + w[i]; + } else { + tt1 = ((x[0] & x[1]) | (x[0] & x[2]) | (x[1] & x[2])) + x[3] + ss2 + w_i; + tt2 = ((x[4] & x[5]) | (~x[4] & x[6])) + x[7] + ss1 + w[i]; + } + + x[3] = x[2]; + x[2] = SM3_ROTATE_32(x[1], 9); + x[1] = x[0]; + x[0] = tt1; + x[7] = x[6]; + x[6] = SM3_ROTATE_32(x[5], 19); + x[5] = x[4]; + x[4] = SM3_P0(tt2); + } + + // Xor `s` with `x` + for (int i = 0; i < 8; ++i) { + s[i] ^= x[i]; + } +} + +// Hashes `message` with `len` bytes with SM3 and stores it to `hash` +void sm3_hash(uint8_t hash[32], const uint8_t *message, size_t len) { + uint32_t s[8 + SM3_BLOCK_SIZE] = { + 0x7380166F, 0x4914B2B9, 0x172442D7, 0xDA8A0600, + 0xA96F30BC, 0x163138AA, 0xE38DEE4D, 0xB0FB0E4E, + }; + uint8_t *b = (uint8_t *)&s[8]; + const uint8_t *m = message; + size_t remaining = len; + + // Hash complete blocks first + while (remaining >= SM3_BLOCK_SIZE) { + memcpy(&s[8], m, SM3_BLOCK_SIZE); + sm3_compress(s); + remaining -= SM3_BLOCK_SIZE; + m += SM3_BLOCK_SIZE; + } + + // Hash the last block with padding + memcpy(b, m, remaining); + // Append bit 1 after the message + b[remaining] = 0b10000000; + ++remaining; + if (remaining > SM3_BLOCK_SIZE - sizeof(uint64_t)) { + sm3_compress(s); + remaining = 0; + } + + // Pad everything between the message and the length with zeros + memset(&b[remaining], 0x00, SM3_BLOCK_SIZE - 8 - remaining); + // Append the length of the message in bits + uint64_t bitlen = 8 * (uint64_t)len; + s[22] = REVERSE_BITS_32((uint32_t)(bitlen >> 32)); + s[23] = REVERSE_BITS_32((uint32_t)bitlen); + sm3_compress(s); + + // stores `s` in `hash` in big-endian + for (size_t i = 0; i < 8; ++i) { + hash[i * 4 + 0] = (uint8_t)(s[i] >> 24); + hash[i * 4 + 1] = (uint8_t)(s[i] >> 16); + hash[i * 4 + 2] = (uint8_t)(s[i] >> 8); + hash[i * 4 + 3] = (uint8_t)(s[i] >> 0); + } +} diff --git a/benchmarks/sm3/zscrypto_rv32/Makefile.in b/benchmarks/sm3/zscrypto_rv32/Makefile.in new file mode 100644 index 00000000..0ef6091b --- /dev/null +++ b/benchmarks/sm3/zscrypto_rv32/Makefile.in @@ -0,0 +1,13 @@ + +ifeq ($(ZSCRYPTO),1) + +ifeq ($(XLEN),32) + +HASH_SM3_ZSCRYPTO_RV32_FILES = \ + sm3/zscrypto_rv32/sm3.c + +$(eval $(call add_lib_target,sm3_zscrypto_rv32,$(HASH_SM3_ZSCRYPTO_RV32_FILES))) + +endif + +endif diff --git a/benchmarks/sm3/zscrypto_rv32/sm3.c b/benchmarks/sm3/zscrypto_rv32/sm3.c new file mode 100644 index 00000000..3925c320 --- /dev/null +++ b/benchmarks/sm3/zscrypto_rv32/sm3.c @@ -0,0 +1,122 @@ + +#include +#include + +#include "riscvcrypto/sm3/api_sm3.h" +#include "riscvcrypto/share/riscv-crypto-intrinsics.h" + +// The block size in bytes +#define SM3_BLOCK_SIZE (16 * sizeof(uint32_t)) + +// Reverses the byte order of `V` +#define REVERSE_BITS_32(V) \ + (((V & 0x000000FF) << 24) | (((V)&0x0000FF00) << 8) | \ + (((V)&0x00FF0000) >> 8) | (((V)&0xFF000000) >> 24)) + +// Rotates `V` by `N` bits to the left +#define SM3_ROTATE_32(V, N) (((V) << (N)) | ((V) >> (32 - (N)))) + +// The two permutation functions +#define SM3_P0(X) _sm3p0((X)) +#define SM3_P1(X) _sm3p1((X)) + +// Expands the state `s` to `w` +static void sm3_expand(uint32_t w[68], uint32_t s[24]) { + for (int i = 0; i < 16; ++i) { + w[i] = REVERSE_BITS_32(s[i + 8]); + } + + for (int i = 16; i < 68; ++i) { + w[i] = SM3_P1(w[i - 16] ^ w[i - 9] ^ SM3_ROTATE_32(w[i - 3], 15)) ^ + SM3_ROTATE_32(w[i - 13], 7) ^ w[i - 6]; + } +} + +// Compresses `s` in place +static void sm3_compress(uint32_t s[24]) { + uint32_t w[68]; + sm3_expand(w, s); + + // The IV and iteration state + uint32_t x[8]; + memcpy(x, s, 8 * sizeof(uint32_t)); + + // The state update transformation below uses and modifies `x` + // depending on the expansion `w` and the current iteration `i` + for (int i = 0; i < 64; ++i) { + // The round constant `t` provides additional randomness + uint32_t t = (i < 16) ? 0x79CC4519 : 0x7A879D8A; + uint32_t rot = SM3_ROTATE_32(x[0], 12); + uint32_t ss1 = SM3_ROTATE_32(rot + x[4] + SM3_ROTATE_32(t, i % 32), 7); + uint32_t ss2 = ss1 ^ rot; + uint32_t w_i = w[i] ^ w[i + 4]; + + uint32_t tt1, tt2; + if (i < 16) { + tt1 = (x[0] ^ x[1] ^ x[2]) + x[3] + ss2 + w_i; + tt2 = (x[4] ^ x[5] ^ x[6]) + x[7] + ss1 + w[i]; + } else { + tt1 = ((x[0] & x[1]) | (x[0] & x[2]) | (x[1] & x[2])) + x[3] + ss2 + w_i; + tt2 = ((x[4] & x[5]) | (~x[4] & x[6])) + x[7] + ss1 + w[i]; + } + + x[3] = x[2]; + x[2] = SM3_ROTATE_32(x[1], 9); + x[1] = x[0]; + x[0] = tt1; + x[7] = x[6]; + x[6] = SM3_ROTATE_32(x[5], 19); + x[5] = x[4]; + x[4] = SM3_P0(tt2); + } + + // Xor `s` with `x` + for (int i = 0; i < 8; ++i) { + s[i] ^= x[i]; + } +} + +// Hashes `message` with `len` bytes with SM3 and stores it to `hash` +void sm3_hash(uint8_t hash[32], const uint8_t *message, size_t len) { + uint32_t s[8 + SM3_BLOCK_SIZE] = { + 0x7380166F, 0x4914B2B9, 0x172442D7, 0xDA8A0600, + 0xA96F30BC, 0x163138AA, 0xE38DEE4D, 0xB0FB0E4E, + }; + uint8_t *b = (uint8_t *)&s[8]; + const uint8_t *m = message; + size_t remaining = len; + + // Hash complete blocks first + while (remaining >= SM3_BLOCK_SIZE) { + memcpy(&s[8], m, SM3_BLOCK_SIZE); + sm3_compress(s); + remaining -= SM3_BLOCK_SIZE; + m += SM3_BLOCK_SIZE; + } + + // Hash the last block with padding + memcpy(b, m, remaining); + // Append bit 1 after the message + b[remaining] = 0b10000000; + ++remaining; + if (remaining > SM3_BLOCK_SIZE - sizeof(uint64_t)) { + sm3_compress(s); + remaining = 0; + } + + // Pad everything between the message and the length with zeros + memset(&b[remaining], 0x00, SM3_BLOCK_SIZE - 8 - remaining); + // Append the length of the message in bits + uint64_t bitlen = 8 * (uint64_t)len; + s[22] = REVERSE_BITS_32((uint32_t)(bitlen >> 32)); + s[23] = REVERSE_BITS_32((uint32_t)bitlen); + sm3_compress(s); + + // stores `s` in `hash` in big-endian + for (size_t i = 0; i < 8; ++i) { + hash[i * 4 + 0] = (uint8_t)(s[i] >> 24); + hash[i * 4 + 1] = (uint8_t)(s[i] >> 16); + hash[i * 4 + 2] = (uint8_t)(s[i] >> 8); + hash[i * 4 + 3] = (uint8_t)(s[i] >> 0); + } +} diff --git a/benchmarks/sm3/zscrypto_rv64/Makefile.in b/benchmarks/sm3/zscrypto_rv64/Makefile.in new file mode 100644 index 00000000..eedab1d3 --- /dev/null +++ b/benchmarks/sm3/zscrypto_rv64/Makefile.in @@ -0,0 +1,13 @@ + +ifeq ($(ZSCRYPTO),1) + +ifeq ($(XLEN),64) + +HASH_SM3_ZSCRYPTO_RV64_FILES = \ + sm3/zscrypto_rv64/sm3.c + +$(eval $(call add_lib_target,sm3_zscrypto_rv64,$(HASH_SM3_ZSCRYPTO_RV64_FILES))) + +endif + +endif diff --git a/benchmarks/sm3/zscrypto_rv64/sm3.c b/benchmarks/sm3/zscrypto_rv64/sm3.c new file mode 100644 index 00000000..3925c320 --- /dev/null +++ b/benchmarks/sm3/zscrypto_rv64/sm3.c @@ -0,0 +1,122 @@ + +#include +#include + +#include "riscvcrypto/sm3/api_sm3.h" +#include "riscvcrypto/share/riscv-crypto-intrinsics.h" + +// The block size in bytes +#define SM3_BLOCK_SIZE (16 * sizeof(uint32_t)) + +// Reverses the byte order of `V` +#define REVERSE_BITS_32(V) \ + (((V & 0x000000FF) << 24) | (((V)&0x0000FF00) << 8) | \ + (((V)&0x00FF0000) >> 8) | (((V)&0xFF000000) >> 24)) + +// Rotates `V` by `N` bits to the left +#define SM3_ROTATE_32(V, N) (((V) << (N)) | ((V) >> (32 - (N)))) + +// The two permutation functions +#define SM3_P0(X) _sm3p0((X)) +#define SM3_P1(X) _sm3p1((X)) + +// Expands the state `s` to `w` +static void sm3_expand(uint32_t w[68], uint32_t s[24]) { + for (int i = 0; i < 16; ++i) { + w[i] = REVERSE_BITS_32(s[i + 8]); + } + + for (int i = 16; i < 68; ++i) { + w[i] = SM3_P1(w[i - 16] ^ w[i - 9] ^ SM3_ROTATE_32(w[i - 3], 15)) ^ + SM3_ROTATE_32(w[i - 13], 7) ^ w[i - 6]; + } +} + +// Compresses `s` in place +static void sm3_compress(uint32_t s[24]) { + uint32_t w[68]; + sm3_expand(w, s); + + // The IV and iteration state + uint32_t x[8]; + memcpy(x, s, 8 * sizeof(uint32_t)); + + // The state update transformation below uses and modifies `x` + // depending on the expansion `w` and the current iteration `i` + for (int i = 0; i < 64; ++i) { + // The round constant `t` provides additional randomness + uint32_t t = (i < 16) ? 0x79CC4519 : 0x7A879D8A; + uint32_t rot = SM3_ROTATE_32(x[0], 12); + uint32_t ss1 = SM3_ROTATE_32(rot + x[4] + SM3_ROTATE_32(t, i % 32), 7); + uint32_t ss2 = ss1 ^ rot; + uint32_t w_i = w[i] ^ w[i + 4]; + + uint32_t tt1, tt2; + if (i < 16) { + tt1 = (x[0] ^ x[1] ^ x[2]) + x[3] + ss2 + w_i; + tt2 = (x[4] ^ x[5] ^ x[6]) + x[7] + ss1 + w[i]; + } else { + tt1 = ((x[0] & x[1]) | (x[0] & x[2]) | (x[1] & x[2])) + x[3] + ss2 + w_i; + tt2 = ((x[4] & x[5]) | (~x[4] & x[6])) + x[7] + ss1 + w[i]; + } + + x[3] = x[2]; + x[2] = SM3_ROTATE_32(x[1], 9); + x[1] = x[0]; + x[0] = tt1; + x[7] = x[6]; + x[6] = SM3_ROTATE_32(x[5], 19); + x[5] = x[4]; + x[4] = SM3_P0(tt2); + } + + // Xor `s` with `x` + for (int i = 0; i < 8; ++i) { + s[i] ^= x[i]; + } +} + +// Hashes `message` with `len` bytes with SM3 and stores it to `hash` +void sm3_hash(uint8_t hash[32], const uint8_t *message, size_t len) { + uint32_t s[8 + SM3_BLOCK_SIZE] = { + 0x7380166F, 0x4914B2B9, 0x172442D7, 0xDA8A0600, + 0xA96F30BC, 0x163138AA, 0xE38DEE4D, 0xB0FB0E4E, + }; + uint8_t *b = (uint8_t *)&s[8]; + const uint8_t *m = message; + size_t remaining = len; + + // Hash complete blocks first + while (remaining >= SM3_BLOCK_SIZE) { + memcpy(&s[8], m, SM3_BLOCK_SIZE); + sm3_compress(s); + remaining -= SM3_BLOCK_SIZE; + m += SM3_BLOCK_SIZE; + } + + // Hash the last block with padding + memcpy(b, m, remaining); + // Append bit 1 after the message + b[remaining] = 0b10000000; + ++remaining; + if (remaining > SM3_BLOCK_SIZE - sizeof(uint64_t)) { + sm3_compress(s); + remaining = 0; + } + + // Pad everything between the message and the length with zeros + memset(&b[remaining], 0x00, SM3_BLOCK_SIZE - 8 - remaining); + // Append the length of the message in bits + uint64_t bitlen = 8 * (uint64_t)len; + s[22] = REVERSE_BITS_32((uint32_t)(bitlen >> 32)); + s[23] = REVERSE_BITS_32((uint32_t)bitlen); + sm3_compress(s); + + // stores `s` in `hash` in big-endian + for (size_t i = 0; i < 8; ++i) { + hash[i * 4 + 0] = (uint8_t)(s[i] >> 24); + hash[i * 4 + 1] = (uint8_t)(s[i] >> 16); + hash[i * 4 + 2] = (uint8_t)(s[i] >> 8); + hash[i * 4 + 3] = (uint8_t)(s[i] >> 0); + } +} diff --git a/benchmarks/test/Makefile.in b/benchmarks/test/Makefile.in index eace9afd..1a7c8caf 100644 --- a/benchmarks/test/Makefile.in +++ b/benchmarks/test/Makefile.in @@ -5,6 +5,8 @@ $(eval $(call add_test_elf_target,test/test_hash_sha512.c,sha512_reference,sha51 $(eval $(call add_test_elf_target,test/test_hash_sha3.c,sha3_reference,sha3_reference)) +$(eval $(call add_test_elf_target,test/test_hash_sm3.c,sm3_reference,sm3_reference)) + $(eval $(call add_test_elf_target,test/test_block_aes_128.c,aes_reference,aes_128_reference)) $(eval $(call add_test_elf_target,test/test_block_aes_192.c,aes_reference,aes_192_reference)) $(eval $(call add_test_elf_target,test/test_block_aes_256.c,aes_reference,aes_256_reference)) @@ -24,6 +26,8 @@ $(eval $(call add_test_elf_target,test/test_block_sm4.c,sm4_zscrypto,sm4_zscrypt ifeq ($(XLEN),32) +$(eval $(call add_test_elf_target,test/test_hash_sm3.c,sm3_zscrypto_rv32,sm3_zscrypto_rv32)) + $(eval $(call add_test_elf_target,test/test_block_aes_128.c,aes_zscrypto_rv32,aes_128_zscrypto_rv32)) $(eval $(call add_test_elf_target,test/test_block_aes_192.c,aes_zscrypto_rv32,aes_192_zscrypto_rv32)) $(eval $(call add_test_elf_target,test/test_block_aes_256.c,aes_zscrypto_rv32,aes_256_zscrypto_rv32)) @@ -34,6 +38,8 @@ ifeq ($(XLEN),64) $(eval $(call add_test_elf_target,test/test_hash_sha512.c,sha512_zscrypto_rv64,sha512_zscrypto_rv64)) +$(eval $(call add_test_elf_target,test/test_hash_sm3.c,sm3_zscrypto_rv64,sm3_zscrypto_rv64)) + $(eval $(call add_test_elf_target,test/test_block_aes_128.c,aes_zscrypto_rv64,aes_128_zscrypto_rv64)) $(eval $(call add_test_elf_target,test/test_block_aes_192.c,aes_zscrypto_rv64,aes_192_zscrypto_rv64)) $(eval $(call add_test_elf_target,test/test_block_aes_256.c,aes_zscrypto_rv64,aes_256_zscrypto_rv64)) From 2b3d78025762f5463dca420874dbf5aa8ddef392 Mon Sep 17 00:00:00 2001 From: Pau Kaifler Date: Thu, 17 Dec 2020 08:40:00 +0100 Subject: [PATCH 2/6] benchmarks: Use less space to store the state --- benchmarks/sm3/reference/sm3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/sm3/reference/sm3.c b/benchmarks/sm3/reference/sm3.c index d949d1ad..54996399 100644 --- a/benchmarks/sm3/reference/sm3.c +++ b/benchmarks/sm3/reference/sm3.c @@ -77,7 +77,7 @@ static void sm3_compress(uint32_t s[24]) { // Hashes `message` with `len` bytes with SM3 and stores it to `hash` void sm3_hash(uint8_t hash[32], const uint8_t *message, size_t len) { - uint32_t s[8 + SM3_BLOCK_SIZE] = { + uint32_t s[24] = { 0x7380166F, 0x4914B2B9, 0x172442D7, 0xDA8A0600, 0xA96F30BC, 0x163138AA, 0xE38DEE4D, 0xB0FB0E4E, }; From 13610ecc04e46cb821d242fd1d74de7bad5da167 Mon Sep 17 00:00:00 2001 From: Pau Kaifler Date: Fri, 18 Dec 2020 09:19:04 +0100 Subject: [PATCH 3/6] benchmarks: use less space to store the state --- benchmarks/sm3/zscrypto_rv32/sm3.c | 2 +- benchmarks/sm3/zscrypto_rv64/sm3.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/sm3/zscrypto_rv32/sm3.c b/benchmarks/sm3/zscrypto_rv32/sm3.c index 3925c320..47006319 100644 --- a/benchmarks/sm3/zscrypto_rv32/sm3.c +++ b/benchmarks/sm3/zscrypto_rv32/sm3.c @@ -78,7 +78,7 @@ static void sm3_compress(uint32_t s[24]) { // Hashes `message` with `len` bytes with SM3 and stores it to `hash` void sm3_hash(uint8_t hash[32], const uint8_t *message, size_t len) { - uint32_t s[8 + SM3_BLOCK_SIZE] = { + uint32_t s[24] = { 0x7380166F, 0x4914B2B9, 0x172442D7, 0xDA8A0600, 0xA96F30BC, 0x163138AA, 0xE38DEE4D, 0xB0FB0E4E, }; diff --git a/benchmarks/sm3/zscrypto_rv64/sm3.c b/benchmarks/sm3/zscrypto_rv64/sm3.c index 3925c320..47006319 100644 --- a/benchmarks/sm3/zscrypto_rv64/sm3.c +++ b/benchmarks/sm3/zscrypto_rv64/sm3.c @@ -78,7 +78,7 @@ static void sm3_compress(uint32_t s[24]) { // Hashes `message` with `len` bytes with SM3 and stores it to `hash` void sm3_hash(uint8_t hash[32], const uint8_t *message, size_t len) { - uint32_t s[8 + SM3_BLOCK_SIZE] = { + uint32_t s[24] = { 0x7380166F, 0x4914B2B9, 0x172442D7, 0xDA8A0600, 0xA96F30BC, 0x163138AA, 0xE38DEE4D, 0xB0FB0E4E, }; From 53cbf4fcef31d76cdad4b2a5f453605337a56cbd Mon Sep 17 00:00:00 2001 From: Pau Kaifler Date: Mon, 28 Dec 2020 15:04:46 +0100 Subject: [PATCH 4/6] benchmarks: add sm3 test --- benchmarks/test/test_hash_sm3.c | 197 ++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 benchmarks/test/test_hash_sm3.c diff --git a/benchmarks/test/test_hash_sm3.c b/benchmarks/test/test_hash_sm3.c new file mode 100644 index 00000000..6d098323 --- /dev/null +++ b/benchmarks/test/test_hash_sm3.c @@ -0,0 +1,197 @@ + +#include +#include + +#include "riscvcrypto/share/test.h" +#include "riscvcrypto/sm3/api_sm3.h" + +int main(int argc, char **argv) { + + printf("import sys, binascii\n"); + printf("benchmark_name = \"" STR(TEST_NAME) "\"\n"); + + #define TEST_COUNT 5 + size_t message_lengths[TEST_COUNT] = { + 0, + 3, + 60, + 72, + 5978, + }; + uint8_t *messages[TEST_COUNT] = { + (uint8_t *)"", + (uint8_t *)"abc", + (uint8_t *)"LCSqGlpVqORfakSUVcXWDOUjQeUWEeAMqmMEL6F85gM3faBCepnFm7bWTFD4", + (uint8_t *)"HHr8sy98NzzqM4xwmhXa55EMubcSHc8wdFxsMhDCLx5EiknvyA9S9qKu5Q4iW" + "vxEHnOpLTuK", + (uint8_t *)"OBSDlpLxLNM3bgCZiC5vqCabShTXnwTfeJRnIbOiWojmYu9Ves7dFJvGWerCg" + "8IrLuN1nHkuY9xOJhAgaMTU0kkIHi3UlsNLw74EzjmZ3Ce1tbWmxsrljOsU4r" + "eI57kLq55SGvPRgbl6xQwhScZsN7aEGV1f8PVW41aCPr453fKPPnYZSbI2e5A" + "er5jFM0MyOT219BwqzEKalHC6pElwlJE7DFJZEGiUpl9ctbAKQUxuaYUFfEzR" + "aLoI54PsQT8U5GKOTMVgQpv0xErFGgeqOOzZ9hp7EAILv8JjlZZCNbKChiAZd" + "XQnAvf2Uz5wyeJKhg95YXNy6hW1vRZgEIoLd3FMezyixW3AAkUTLpeTiLnDgy" + "YeaGkZqDF8GhHt3ZoqeKCRPZ36bgMr9Ny7QrDFDEzrgp2radGUUrYGKpwpr5s" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "OGuufVf6Ml1fRSD3jytNy7aLcHNm2wmf9IbWztOYwfrz56omtmYdTl0dasdas" + "OBSDlpLxLNM3bgCZiC5vqCabShTXnwTfeJRnIbOiWojmYu9Ves7dFJvGWerCg" + "8IrLuN1nHkuY9xOJhAgaMTU0kkIHi3UlsNLw74EzjmZ3Ce1tbWmxsrljOsU4r" + "eI57kLq55SGvPRgbl6xQwhScZsN7aEGV1f8PVW41aCPr453fKPPnYZSbI2e5A" + "er5jFM0MyOT219BwqzEKalHC6pElwlJE7DFJZEGiUpl9ctbAKQUxuaYUFfEzR" + "aLoI54PsQT8U5GKOTMVgQpv0xErFGgeqOOzZ9hp7EAILv8JjlZZCNbKChiAZd" + "XQnAvf2Uz5wyeJKhg95YXNy6hW1vRZgEIoLd3FMezyixW3AAkUTLpeTiLnDgy" + "YeaGkZqDF8GhHt3ZoqeKCRPZ36bgMr9Ny7QrDFDEzrgp2radGUUrYGKpwpr5s" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "OGuufVf6Ml1fRSD3jytNy7aLcHNm2wmf9IbWztOYwfrz56omtmYdTl0dasdas" + "OBSDlpLxLNM3bgCZiC5vqCabShTXnwTfeJRnIbOiWojmYu9Ves7dFJvGWerCg" + "8IrLuN1nHkuY9xOJhAgaMTU0kkIHi3UlsNLw74EzjmZ3Ce1tbWmxsrljOsU4r" + "eI57kLq55SGvPRgbl6xQwhScZsN7aEGV1f8PVW41aCPr453fKPPnYZSbI2e5A" + "er5jFM0MyOT219BwqzEKalHC6pElwlJE7DFJZEGiUpl9ctbAKQUxuaYUFfEzR" + "aLoI54PsQT8U5GKOTMVgQpv0xErFGgeqOOzZ9hp7EAILv8JjlZZCNbKChiAZd" + "XQnAvf2Uz5wyeJKhg95YXNy6hW1vRZgEIoLd3FMezyixW3AAkUTLpeTiLnDgy" + "YeaGkZqDF8GhHt3ZoqeKCRPZ36bgMr9Ny7QrDFDEzrgp2radGUUrYGKpwpr5s" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "OGuufVf6Ml1fRSD3jytNy7aLcHNm2wmf9IbWztOYwfrz56omtmYdTl0dasdas" + "OBSDlpLxLNM3bgCZiC5vqCabShTXnwTfeJRnIbOiWojmYu9Ves7dFJvGWerCg" + "8IrLuN1nHkuY9xOJhAgaMTU0kkIHi3UlsNLw74EzjmZ3Ce1tbWmxsrljOsU4r" + "eI57kLq55SGvPRgbl6xQwhScZsN7aEGV1f8PVW41aCPr453fKPPnYZSbI2e5A" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "OGuufVf6Ml1fRSD3jytNy7aLcHNm2wmf9IbWztOYwfrz56omtmYdTl0dasdas" + "OBSDlpLxLNM3bgCZiC5vqCabShTXnwTfeJRnIbOiWojmYu9Ves7dFJvGWerCg" + "8IrLuN1nHkuY9xOJhAgaMTU0kkIHi3UlsNLw74EzjmZ3Ce1tbWmxsrljOsU4r" + "eI57kLq55SGvPRgbl6xQwhScZsN7aEGV1f8PVW41aCPr453fKPPnYZSbI2e5A" + "er5jFM0MyOT219BwqzEKalHC6pElwlJE7DFJZEGiUpl9ctbAKQUxuaYUFfEzR" + "aLoI54PsQT8U5GKOTMVgQpv0xErFGgeqOOzZ9hp7EAILv8JjlZZCNbKChiAZd" + "XQnAvf2Uz5wyeJKhg95YXNy6hW1vRZgEIoLd3FMezyixW3AAkUTLpeTiLnDgy" + "YeaGkZqDF8GhHt3ZoqeKCRPZ36bgMr9Ny7QrDFDEzrgp2radGUUrYGKpwpr5s" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "OGuufVf6Ml1fRSD3jytNy7aLcHNm2wmf9IbWztOYwfrz56omtmYdTl0dasdas" + "OBSDlpLxLNM3bgCZiC5vqCabShTXnwTfeJRnIbOiWojmYu9Ves7dFJvGWerCg" + "8IrLuN1nHkuY9xOJhAgaMTU0kkIHi3UlsNLw74EzjmZ3Ce1tbWmxsrljOsU4r" + "eI57kLq55SGvPRgbl6xQwhScZsN7aEGV1f8PVW41aCPr453fKPPnYZSbI2e5A" + "er5jFM0MyOT219BwqzEKalHC6pElwlJE7DFJZEGiUpl9ctbAKQUxuaYUFfEzR" + "aLoI54PsQT8U5GKOTMVgQpv0xErFGgeqOOzZ9hp7EAILv8JjlZZCNbKChiAZd" + "XQnAvf2Uz5wyeJKhg95YXNy6hW1vRZgEIoLd3FMezyixW3AAkUTLpeTiLnDgy" + "YeaGkZqDF8GhHt3ZoqeKCRPZ36bgMr9Ny7QrDFDEzrgp2radGUUrYGKpwpr5s" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "er5jFM0MyOT219BwqzEKalHC6pElwlJE7DFJZEGiUpl9ctbAKQUxuaYUFfEzR" + "aLoI54PsQT8U5GKOTMVgQpv0xErFGgeqOOzZ9hp7EAILv8JjlZZCNbKChiAZd" + "XQnAvf2Uz5wyeJKhg95YXNy6hW1vRZgEIoLd3FMezyixW3AAkUTLpeTiLnDgy" + "YeaGkZqDF8GhHt3ZoqeKCRPZ36bgMr9Ny7QrDFDEzrgp2radGUUrYGKpwpr5s" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "OGuufVf6Ml1fRSD3jytNy7aLcHNm2wmf9IbWztOYwfrz56omtmYdTl0dasdas" + "OBSDlpLxLNM3bgCZiC5vqCabShTXnwTfeJRnIbOiWojmYu9Ves7dFJvGWerCg" + "8IrLuN1nHkuY9xOJhAgaMTU0kkIHi3UlsNLw74EzjmZ3Ce1tbWmxsrljOsU4r" + "eI57kLq55SGvPRgbl6xQwhScZsN7aEGV1f8PVW41aCPr453fKPPnYZSbI2e5A" + "er5jFM0MyOT219BwqzEKalHC6pElwlJE7DFJZEGiUpl9ctbAKQUxuaYUFfEzR" + "aLoI54PsQT8U5GKOTMVgQpv0xErFGgeqOOzZ9hp7EAILv8JjlZZCNbKChiAZd" + "XQnAvf2Uz5wyeJKhg95YXNy6hW1vRZgEIoLd3FMezyixW3AAkUTLpeTiLnDgy" + "YeaGkZqDF8GhHt3ZoqeKCRPZ36bgMr9Ny7QrDFDEzrgp2radGUUrYGKpwpr5s" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "OGuufVf6Ml1fRSD3jytNy7aLcHNm2wmf9IbWztOYwfrz56omtmYdTl0dasdas" + "OBSDlpLxLNM3bgCZiC5vqCabShTXnwTfeJRnIbOiWojmYu9Ves7dFJvGWerCg" + "8IrLuN1nHkuY9xOJhAgaMTU0kkIHi3UlsNLw74EzjmZ3Ce1tbWmxsrljOsU4r" + "eI57kLq55SGvPRgbl6xQwhScZsN7aEGV1f8PVW41aCPr453fKPPnYZSbI2e5A" + "er5jFM0MyOT219BwqzEKalHC6pElwlJE7DFJZEGiUpl9ctbAKQUxuaYUFfEzR" + "aLoI54PsQT8U5GKOTMVgQpv0xErFGgeqOOzZ9hp7EAILv8JjlZZCNbKChiAZd" + "XQnAvf2Uz5wyeJKhg95YXNy6hW1vRZgEIoLd3FMezyixW3AAkUTLpeTiLnDgy" + "YeaGkZqDF8GhHt3ZoqeKCRPZ36bgMr9Ny7QrDFDEzrgp2radGUUrYGKpwpr5s" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "OGuufVf6Ml1fRSD3jytNy7aLcHNm2wmf9IbWztOYwfrz56omtmYdTl0dasdas" + "OBSDlpLxLNM3bgCZiC5vqCabShTXnwTfeJRnIbOiWojmYu9Ves7dFJvGWerCg" + "8IrLuN1nHkuY9xOJhAgaMTU0kkIHi3UlsNLw74EzjmZ3Ce1tbWmxsrljOsU4r" + "eI57kLq55SGvPRgbl6xQwhScZsN7aEGV1f8PVW41aCPr453fKPPnYZSbI2e5A" + "er5jFM0MyOT219BwqzEKalHC6pElwlJE7DFJZEGiUpl9ctbAKQUxuaYUFfEzR" + "aLoI54PsQT8U5GKOTMVgQpv0xErFGgeqOOzZ9hp7EAILv8JjlZZCNbKChiAZd" + "XQnAvf2Uz5wyeJKhg95YXNy6hW1vRZgEIoLd3FMezyixW3AAkUTLpeTiLnDgy" + "YeaGkZqDF8GhHt3ZoqeKCRPZ36bgMr9Ny7QrDFDEzrgp2radGUUrYGKpwpr5s" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "OGuufVf6Ml1fRSD3jytNy7aLcHNm2wmf9IbWztOYwfrz56omtmYdTl0dasdas" + "OBSDlpLxLNM3bgCZiC5vqCabShTXnwTfeJRnIbOiWojmYu9Ves7dFJvGWerCg" + "8IrLuN1nHkuY9xOJhAgaMTU0kkIHi3UlsNLw74EzjmZ3Ce1tbWmxsrljOsU4r" + "eI57kLq55SGvPRgbl6xQwhScZsN7aEGV1f8PVW41aCPr453fKPPnYZSbI2e5A" + "er5jFM0MyOT219BwqzEKalHC6pElwlJE7DFJZEGiUpl9ctbAKQUxuaYUFfEzR" + "aLoI54PsQT8U5GKOTMVgQpv0xErFGgeqOOzZ9hp7EAILv8JjlZZCNbKChiAZd" + "XQnAvf2Uz5wyeJKhg95YXNy6hW1vRZgEIoLd3FMezyixW3AAkUTLpeTiLnDgy" + "YeaGkZqDF8GhHt3ZoqeKCRPZ36bgMr9Ny7QrDFDEzrgp2radGUUrYGKpwpr5s" + "moAqGIMHGckuaMNu1VCjhwZpPUNdt8ETxmIbODsDxgTGNL5P1NbfE6VvsFBWT" + "OGuufVf6Ml1fRSD3jytNy7aLcHNm2wmf9IbWztOYwfrz56omtmYdTl0dsadas", + }; + uint8_t expected_digests[TEST_COUNT][32] = { + {0x1A, 0xB2, 0x1D, 0x83, 0x55, 0xCF, 0xA1, 0x7F, 0x8E, 0x61, 0x19, + 0x48, 0x31, 0xE8, 0x1A, 0x8F, 0x22, 0xBE, 0xC8, 0xC7, 0x28, 0xFE, + 0xFB, 0x74, 0x7E, 0xD0, 0x35, 0xEB, 0x50, 0x82, 0xAA, 0x2B}, + {0x66, 0xC7, 0xF0, 0xF4, 0x62, 0xEE, 0xED, 0xD9, 0xD1, 0xF2, 0xD4, + 0x6B, 0xDC, 0x10, 0xE4, 0xE2, 0x41, 0x67, 0xC4, 0x87, 0x5C, 0xF2, + 0xF7, 0xA2, 0x29, 0x7D, 0xA0, 0x2B, 0x8F, 0x4B, 0xA8, 0xE0}, + {0x44, 0x9E, 0x07, 0xB6, 0xA7, 0xCE, 0xAF, 0x7F, 0x1F, 0xAD, 0xD9, + 0x27, 0xAC, 0xF8, 0xA9, 0x50, 0x53, 0x9E, 0x29, 0x24, 0x73, 0xD4, + 0x6C, 0xFC, 0xD0, 0x04, 0xB9, 0xCD, 0xB6, 0x16, 0x6D, 0x64}, + {0x92, 0xA2, 0x58, 0x45, 0x27, 0x64, 0x32, 0x98, 0xF8, 0xE6, 0x65, + 0xCE, 0xE4, 0x25, 0x4C, 0xAF, 0x1D, 0xC0, 0xA4, 0xAF, 0xFA, 0x23, + 0x69, 0xED, 0x9F, 0xBA, 0x6E, 0xDF, 0x63, 0x69, 0xCE, 0x9B}, + {0xd9, 0x20, 0xd8, 0x2c, 0xc4, 0x3a, 0xb3, 0x85, 0x2f, 0x0d, 0x8b, 0x21, 0xaa, 0xc7, 0xde, 0xee, 0x78, 0xd6, 0xdd, 0x70, 0xd3, 0x04, 0x39, 0xdb, 0x47, 0xdc, 0x7b, 0x59, 0xb8, 0x5c, 0x34, 0x65}, + }; + + for (int i = 0; i < TEST_COUNT; i++) { + + const uint64_t start_instrs = test_rdinstret(); + + uint8_t actual_digest[32]; + sm3_hash(actual_digest, messages[i], message_lengths[i]); + + const uint64_t end_instrs = test_rdinstret(); + + const uint64_t final_instrs = end_instrs - start_instrs; + + printf("#\n# test %d/%d\n", i, TEST_COUNT); + + printf("input_len = %lu\n", message_lengths[i]); + + printf("input_data = "); + puthex_py(messages[i], message_lengths[i]); + printf("\n"); + + printf("actual_digest = "); + puthex_py(actual_digest, 32); + printf("\n"); + + printf("instr_count = 0x"); + puthex64(final_instrs); + printf("\n"); + + printf("testnum = %d\n", i); + printf("ipb = 0 if input_len == 0 else instr_count / " + "input_len\n"); + + printf("expected_digest = "); + puthex_py(expected_digests[i], 32); + printf("\n"); + + printf("if( expected_digest != expected_digest ):\n"); + printf(" print(\"Test %d failed.\")\n", i); + printf( + " print( 'input == %%s' %% ( binascii.b2a_hex( input_data ) ) )" + "\n"); + printf(" print( 'actual_digest == %%s' %% ( binascii.b2a_hex( " + "actual_digest ) ) )" + "\n"); + printf(" print( ' != %%s' %% ( binascii.b2a_hex( " + "expected_digest ) ) )" + "\n"); + printf(" sys.exit(1)\n"); + printf("else:\n"); + printf( + " print(\"" STR(TEST_NAME) " Test %%d passed. " + "%%d instrs / %%d bytes. IPB=%%f\" %% " + "(testnum,instr_count,input_len,ipb))\n"); + } + + return 0; +} From 74c2612862cbc5446dfe86a37864bb8120435c2b Mon Sep 17 00:00:00 2001 From: Pau Kaifler Date: Fri, 12 Feb 2021 11:46:00 +0100 Subject: [PATCH 5/6] benchmarks: optimize sm3 This commit optimizes the RV32 SM3 implementation to yield a speedup of about 2.6x the original implementation. The RV64 version is faster now too, but the toolchain seems to be broken as grev and rol don't seem to compile. So it's still a lot slower. --- benchmarks/sm3/reference/sm3.c | 123 +++++++++++++++++---------- benchmarks/sm3/zscrypto_rv32/sm3.c | 130 ++++++++++++++++++----------- benchmarks/sm3/zscrypto_rv64/sm3.c | 125 +++++++++++++++++---------- benchmarks/test/test_hash_sm3.c | 8 +- 4 files changed, 243 insertions(+), 143 deletions(-) diff --git a/benchmarks/sm3/reference/sm3.c b/benchmarks/sm3/reference/sm3.c index 54996399..ad9af8d3 100644 --- a/benchmarks/sm3/reference/sm3.c +++ b/benchmarks/sm3/reference/sm3.c @@ -8,7 +8,7 @@ #define SM3_BLOCK_SIZE (16 * sizeof(uint32_t)) // Reverses the byte order of `V` -#define REVERSE_BITS_32(V) \ +#define REVERSE_BYTES_32(V) \ (((V & 0x000000FF) << 24) | (((V)&0x0000FF00) << 8) | \ (((V)&0x00FF0000) >> 8) | (((V)&0xFF000000) >> 24)) @@ -19,54 +19,83 @@ #define SM3_P0(X) ((X) ^ SM3_ROTATE_32((X), 9) ^ SM3_ROTATE_32((X), 17)) #define SM3_P1(X) ((X) ^ SM3_ROTATE_32((X), 15) ^ SM3_ROTATE_32((X), 23)) -// Expands the state `s` to `w` -static void sm3_expand(uint32_t w[68], uint32_t s[24]) { - for (int i = 0; i < 16; ++i) { - w[i] = REVERSE_BITS_32(s[i + 8]); +// Expands state values and returns the result +#define SM3_EXPAND_STEP(W0, W3, W7, W10, W13) \ + (SM3_P1((W0) ^ (W7) ^ SM3_ROTATE_32((W13), 15)) ^ SM3_ROTATE_32((W3), 7) ^ \ + (W10)) + +// Performs a compression step with permutation constant T, iteration I +// and expanded words W1 and W2 +#define SM3_COMPRESS_STEP(I, W1, W2) \ + { \ + uint32_t t = (I) < 16 ? 0x79CC4519 : 0x7A879D8A; \ + uint32_t rot = SM3_ROTATE_32(x[0], 12); \ + uint32_t ss1 = SM3_ROTATE_32(rot + x[4] + SM3_ROTATE_32(t, (I)), 7); \ + \ + uint32_t tt1, tt2; \ + /* optimized out by the compiler */ \ + if ((I) < 16) { \ + tt1 = (x[0] ^ x[1] ^ x[2]) + x[3] + (ss1 ^ rot) + ((W1) ^ (W2)); \ + tt2 = (x[4] ^ x[5] ^ x[6]) + x[7] + ss1 + (W1); \ + } else { \ + tt1 = ((x[0] & x[1]) | (x[0] & x[2]) | (x[1] & x[2])) + x[3] + \ + (ss1 ^ rot) + ((W1) ^ (W2)); \ + tt2 = ((x[4] & x[5]) | (~x[4] & x[6])) + x[7] + ss1 + (W1); \ + } \ + \ + x[3] = x[2]; \ + x[2] = SM3_ROTATE_32(x[1], 9); \ + x[1] = x[0]; \ + x[0] = tt1; \ + x[7] = x[6]; \ + x[6] = SM3_ROTATE_32(x[5], 19); \ + x[5] = x[4]; \ + x[4] = SM3_P0(tt2); \ } - for (int i = 16; i < 68; ++i) { - w[i] = SM3_P1(w[i - 16] ^ w[i - 9] ^ SM3_ROTATE_32(w[i - 3], 15)) ^ - SM3_ROTATE_32(w[i - 13], 7) ^ w[i - 6]; - } -} - // Compresses `s` in place static void sm3_compress(uint32_t s[24]) { - uint32_t w[68]; - sm3_expand(w, s); - // The IV and iteration state uint32_t x[8]; - memcpy(x, s, 8 * sizeof(uint32_t)); - - // The state update transformation below uses and modifies `x` - // depending on the expansion `w` and the current iteration `i` - for (int i = 0; i < 64; ++i) { - // The round constant `t` provides additional randomness - uint32_t t = (i < 16) ? 0x79CC4519 : 0x7A879D8A; - uint32_t rot = SM3_ROTATE_32(x[0], 12); - uint32_t ss1 = SM3_ROTATE_32(rot + x[4] + SM3_ROTATE_32(t, i % 32), 7); - uint32_t ss2 = ss1 ^ rot; - uint32_t w_i = w[i] ^ w[i + 4]; - - uint32_t tt1, tt2; - if (i < 16) { - tt1 = (x[0] ^ x[1] ^ x[2]) + x[3] + ss2 + w_i; - tt2 = (x[4] ^ x[5] ^ x[6]) + x[7] + ss1 + w[i]; - } else { - tt1 = ((x[0] & x[1]) | (x[0] & x[2]) | (x[1] & x[2])) + x[3] + ss2 + w_i; - tt2 = ((x[4] & x[5]) | (~x[4] & x[6])) + x[7] + ss1 + w[i]; + for (int i = 0; i < 8; ++i) { + x[i] = s[i]; + } + + // `w` contains 16 of the expanded words. + uint32_t w[16]; + for (int i = 0; i < 16; ++i) { + w[i] = REVERSE_BYTES_32(s[i + 8]); + } + + // Compress first 12 words. + for (int i = 0; i < 12; ++i) { + SM3_COMPRESS_STEP(i, w[i], w[i + 4]); + } + // Compress and expand the remaining 4 words. + for (int i = 0; i < 4; ++i) { + w[i] = + SM3_EXPAND_STEP(w[i], w[3 + i], w[7 + i], w[10 + i], w[(13 + i) % 16]); + SM3_COMPRESS_STEP(i + 12, w[i + 12], w[i]); + } + + // Rounds 16 to 64 + for (int j = 16; j < 64; j += 16) { + // Expand and then compress the first 12 words as the remaining 4 need to be + // handled differently in this implementation. + for (int i = 0; i < 12; ++i) { + w[4 + i] = SM3_EXPAND_STEP(w[4 + i], w[(7 + i) % 16], w[(11 + i) % 16], + w[(14 + i) % 16], w[(1 + i) % 16]); + } + for (int i = 0; i < 12; ++i) { + SM3_COMPRESS_STEP(i + j, w[i], w[i + 4]); } - x[3] = x[2]; - x[2] = SM3_ROTATE_32(x[1], 9); - x[1] = x[0]; - x[0] = tt1; - x[7] = x[6]; - x[6] = SM3_ROTATE_32(x[5], 19); - x[5] = x[4]; - x[4] = SM3_P0(tt2); + // Now expand and compress the remaining 4 words. + for (int i = 0; i < 4; ++i) { + w[i] = SM3_EXPAND_STEP(w[i], w[3 + i], w[7 + i], w[10 + i], + w[(13 + i) % 16]); + SM3_COMPRESS_STEP(i + j + 12, w[i + 12], w[i]); + } } // Xor `s` with `x` @@ -87,14 +116,18 @@ void sm3_hash(uint8_t hash[32], const uint8_t *message, size_t len) { // Hash complete blocks first while (remaining >= SM3_BLOCK_SIZE) { - memcpy(&s[8], m, SM3_BLOCK_SIZE); + for (int i = 0; i < SM3_BLOCK_SIZE; ++i) { + b[i] = m[i]; + } sm3_compress(s); remaining -= SM3_BLOCK_SIZE; m += SM3_BLOCK_SIZE; } // Hash the last block with padding - memcpy(b, m, remaining); + for (int i = 0; i < remaining; ++i) { + b[i] = m[i]; + } // Append bit 1 after the message b[remaining] = 0b10000000; ++remaining; @@ -107,8 +140,8 @@ void sm3_hash(uint8_t hash[32], const uint8_t *message, size_t len) { memset(&b[remaining], 0x00, SM3_BLOCK_SIZE - 8 - remaining); // Append the length of the message in bits uint64_t bitlen = 8 * (uint64_t)len; - s[22] = REVERSE_BITS_32((uint32_t)(bitlen >> 32)); - s[23] = REVERSE_BITS_32((uint32_t)bitlen); + s[22] = REVERSE_BYTES_32((uint32_t)(bitlen >> 32)); + s[23] = REVERSE_BYTES_32((uint32_t)bitlen); sm3_compress(s); // stores `s` in `hash` in big-endian diff --git a/benchmarks/sm3/zscrypto_rv32/sm3.c b/benchmarks/sm3/zscrypto_rv32/sm3.c index 47006319..e6bf1b99 100644 --- a/benchmarks/sm3/zscrypto_rv32/sm3.c +++ b/benchmarks/sm3/zscrypto_rv32/sm3.c @@ -2,72 +2,100 @@ #include #include -#include "riscvcrypto/sm3/api_sm3.h" #include "riscvcrypto/share/riscv-crypto-intrinsics.h" +#include "riscvcrypto/sm3/api_sm3.h" +#include "rvintrin.h" // The block size in bytes #define SM3_BLOCK_SIZE (16 * sizeof(uint32_t)) // Reverses the byte order of `V` -#define REVERSE_BITS_32(V) \ - (((V & 0x000000FF) << 24) | (((V)&0x0000FF00) << 8) | \ - (((V)&0x00FF0000) >> 8) | (((V)&0xFF000000) >> 24)) +#define REVERSE_BYTES_32(V) (_rv32_grev((V), 0x18)) // Rotates `V` by `N` bits to the left -#define SM3_ROTATE_32(V, N) (((V) << (N)) | ((V) >> (32 - (N)))) +#define SM3_ROTATE_32(V, N) (_rv32_rol((V), (N))) // The two permutation functions #define SM3_P0(X) _sm3p0((X)) #define SM3_P1(X) _sm3p1((X)) -// Expands the state `s` to `w` -static void sm3_expand(uint32_t w[68], uint32_t s[24]) { - for (int i = 0; i < 16; ++i) { - w[i] = REVERSE_BITS_32(s[i + 8]); - } - - for (int i = 16; i < 68; ++i) { - w[i] = SM3_P1(w[i - 16] ^ w[i - 9] ^ SM3_ROTATE_32(w[i - 3], 15)) ^ - SM3_ROTATE_32(w[i - 13], 7) ^ w[i - 6]; +// Expands state values and returns the result +#define SM3_EXPAND_STEP(W0, W3, W7, W10, W13) \ + (SM3_P1((W0) ^ (W7) ^ SM3_ROTATE_32((W13), 15)) ^ SM3_ROTATE_32((W3), 7) ^ \ + (W10)) + +// Performs a compression step with permutation constant T, iteration I +// and expanded words W1 and W2 +#define SM3_COMPRESS_STEP(I, W1, W2) \ + { \ + uint32_t t = (I) < 16 ? 0x79CC4519 : 0x7A879D8A; \ + uint32_t rot = SM3_ROTATE_32(x[0], 12); \ + uint32_t ss1 = SM3_ROTATE_32(rot + x[4] + SM3_ROTATE_32(t, (I)), 7); \ + \ + uint32_t tt1, tt2; \ + /* optimized out by the compiler */ \ + if ((I) < 16) { \ + tt1 = (x[0] ^ x[1] ^ x[2]) + x[3] + (ss1 ^ rot) + ((W1) ^ (W2)); \ + tt2 = (x[4] ^ x[5] ^ x[6]) + x[7] + ss1 + (W1); \ + } else { \ + tt1 = ((x[0] & x[1]) | (x[0] & x[2]) | (x[1] & x[2])) + x[3] + \ + (ss1 ^ rot) + ((W1) ^ (W2)); \ + tt2 = ((x[4] & x[5]) | (~x[4] & x[6])) + x[7] + ss1 + (W1); \ + } \ + \ + x[3] = x[2]; \ + x[2] = SM3_ROTATE_32(x[1], 9); \ + x[1] = x[0]; \ + x[0] = tt1; \ + x[7] = x[6]; \ + x[6] = SM3_ROTATE_32(x[5], 19); \ + x[5] = x[4]; \ + x[4] = SM3_P0(tt2); \ } -} // Compresses `s` in place static void sm3_compress(uint32_t s[24]) { - uint32_t w[68]; - sm3_expand(w, s); - // The IV and iteration state uint32_t x[8]; - memcpy(x, s, 8 * sizeof(uint32_t)); - - // The state update transformation below uses and modifies `x` - // depending on the expansion `w` and the current iteration `i` - for (int i = 0; i < 64; ++i) { - // The round constant `t` provides additional randomness - uint32_t t = (i < 16) ? 0x79CC4519 : 0x7A879D8A; - uint32_t rot = SM3_ROTATE_32(x[0], 12); - uint32_t ss1 = SM3_ROTATE_32(rot + x[4] + SM3_ROTATE_32(t, i % 32), 7); - uint32_t ss2 = ss1 ^ rot; - uint32_t w_i = w[i] ^ w[i + 4]; - - uint32_t tt1, tt2; - if (i < 16) { - tt1 = (x[0] ^ x[1] ^ x[2]) + x[3] + ss2 + w_i; - tt2 = (x[4] ^ x[5] ^ x[6]) + x[7] + ss1 + w[i]; - } else { - tt1 = ((x[0] & x[1]) | (x[0] & x[2]) | (x[1] & x[2])) + x[3] + ss2 + w_i; - tt2 = ((x[4] & x[5]) | (~x[4] & x[6])) + x[7] + ss1 + w[i]; + for (int i = 0; i < 8; ++i) { + x[i] = s[i]; + } + + // `w` contains 16 of the expanded words. + uint32_t w[16]; + for (int i = 0; i < 16; ++i) { + w[i] = REVERSE_BYTES_32(s[i + 8]); + } + + // Compress first 12 words. + for (int i = 0; i < 12; ++i) { + SM3_COMPRESS_STEP(i, w[i], w[i + 4]); + } + // Compress and expand the remaining 4 words. + for (int i = 0; i < 4; ++i) { + w[i] = + SM3_EXPAND_STEP(w[i], w[3 + i], w[7 + i], w[10 + i], w[(13 + i) % 16]); + SM3_COMPRESS_STEP(i + 12, w[i + 12], w[i]); + } + + // Rounds 16 to 64 + for (int j = 16; j < 64; j += 16) { + // Expand and then compress the first 12 words as the remaining 4 need to be + // handled differently in this implementation. + for (int i = 0; i < 12; ++i) { + w[4 + i] = SM3_EXPAND_STEP(w[4 + i], w[(7 + i) % 16], w[(11 + i) % 16], + w[(14 + i) % 16], w[(1 + i) % 16]); + } + for (int i = 0; i < 12; ++i) { + SM3_COMPRESS_STEP(i + j, w[i], w[i + 4]); } - x[3] = x[2]; - x[2] = SM3_ROTATE_32(x[1], 9); - x[1] = x[0]; - x[0] = tt1; - x[7] = x[6]; - x[6] = SM3_ROTATE_32(x[5], 19); - x[5] = x[4]; - x[4] = SM3_P0(tt2); + // Now expand and compress the remaining 4 words. + for (int i = 0; i < 4; ++i) { + w[i] = SM3_EXPAND_STEP(w[i], w[3 + i], w[7 + i], w[10 + i], + w[(13 + i) % 16]); + SM3_COMPRESS_STEP(i + j + 12, w[i + 12], w[i]); + } } // Xor `s` with `x` @@ -88,14 +116,18 @@ void sm3_hash(uint8_t hash[32], const uint8_t *message, size_t len) { // Hash complete blocks first while (remaining >= SM3_BLOCK_SIZE) { - memcpy(&s[8], m, SM3_BLOCK_SIZE); + for (int i = 0; i < SM3_BLOCK_SIZE; ++i) { + b[i] = m[i]; + } sm3_compress(s); remaining -= SM3_BLOCK_SIZE; m += SM3_BLOCK_SIZE; } // Hash the last block with padding - memcpy(b, m, remaining); + for (int i = 0; i < remaining; ++i) { + b[i] = m[i]; + } // Append bit 1 after the message b[remaining] = 0b10000000; ++remaining; @@ -108,8 +140,8 @@ void sm3_hash(uint8_t hash[32], const uint8_t *message, size_t len) { memset(&b[remaining], 0x00, SM3_BLOCK_SIZE - 8 - remaining); // Append the length of the message in bits uint64_t bitlen = 8 * (uint64_t)len; - s[22] = REVERSE_BITS_32((uint32_t)(bitlen >> 32)); - s[23] = REVERSE_BITS_32((uint32_t)bitlen); + s[22] = REVERSE_BYTES_32((uint32_t)(bitlen >> 32)); + s[23] = REVERSE_BYTES_32((uint32_t)bitlen); sm3_compress(s); // stores `s` in `hash` in big-endian diff --git a/benchmarks/sm3/zscrypto_rv64/sm3.c b/benchmarks/sm3/zscrypto_rv64/sm3.c index 47006319..a236c5d5 100644 --- a/benchmarks/sm3/zscrypto_rv64/sm3.c +++ b/benchmarks/sm3/zscrypto_rv64/sm3.c @@ -2,14 +2,14 @@ #include #include -#include "riscvcrypto/sm3/api_sm3.h" #include "riscvcrypto/share/riscv-crypto-intrinsics.h" +#include "riscvcrypto/sm3/api_sm3.h" // The block size in bytes #define SM3_BLOCK_SIZE (16 * sizeof(uint32_t)) // Reverses the byte order of `V` -#define REVERSE_BITS_32(V) \ +#define REVERSE_BYTES_32(V) \ (((V & 0x000000FF) << 24) | (((V)&0x0000FF00) << 8) | \ (((V)&0x00FF0000) >> 8) | (((V)&0xFF000000) >> 24)) @@ -20,54 +20,83 @@ #define SM3_P0(X) _sm3p0((X)) #define SM3_P1(X) _sm3p1((X)) -// Expands the state `s` to `w` -static void sm3_expand(uint32_t w[68], uint32_t s[24]) { - for (int i = 0; i < 16; ++i) { - w[i] = REVERSE_BITS_32(s[i + 8]); - } - - for (int i = 16; i < 68; ++i) { - w[i] = SM3_P1(w[i - 16] ^ w[i - 9] ^ SM3_ROTATE_32(w[i - 3], 15)) ^ - SM3_ROTATE_32(w[i - 13], 7) ^ w[i - 6]; +// Expands state values and returns the result +#define SM3_EXPAND_STEP(W0, W3, W7, W10, W13) \ + (SM3_P1((W0) ^ (W7) ^ SM3_ROTATE_32((W13), 15)) ^ SM3_ROTATE_32((W3), 7) ^ \ + (W10)) + +// Performs a compression step with permutation constant T, iteration I +// and expanded words W1 and W2 +#define SM3_COMPRESS_STEP(I, W1, W2) \ + { \ + uint32_t t = (I) < 16 ? 0x79CC4519 : 0x7A879D8A; \ + uint32_t rot = SM3_ROTATE_32(x[0], 12); \ + uint32_t ss1 = SM3_ROTATE_32(rot + x[4] + SM3_ROTATE_32(t, (I)), 7); \ + \ + uint32_t tt1, tt2; \ + /* optimized out by the compiler */ \ + if ((I) < 16) { \ + tt1 = (x[0] ^ x[1] ^ x[2]) + x[3] + (ss1 ^ rot) + ((W1) ^ (W2)); \ + tt2 = (x[4] ^ x[5] ^ x[6]) + x[7] + ss1 + (W1); \ + } else { \ + tt1 = ((x[0] & x[1]) | (x[0] & x[2]) | (x[1] & x[2])) + x[3] + \ + (ss1 ^ rot) + ((W1) ^ (W2)); \ + tt2 = ((x[4] & x[5]) | (~x[4] & x[6])) + x[7] + ss1 + (W1); \ + } \ + \ + x[3] = x[2]; \ + x[2] = SM3_ROTATE_32(x[1], 9); \ + x[1] = x[0]; \ + x[0] = tt1; \ + x[7] = x[6]; \ + x[6] = SM3_ROTATE_32(x[5], 19); \ + x[5] = x[4]; \ + x[4] = SM3_P0(tt2); \ } -} // Compresses `s` in place static void sm3_compress(uint32_t s[24]) { - uint32_t w[68]; - sm3_expand(w, s); - // The IV and iteration state uint32_t x[8]; - memcpy(x, s, 8 * sizeof(uint32_t)); - - // The state update transformation below uses and modifies `x` - // depending on the expansion `w` and the current iteration `i` - for (int i = 0; i < 64; ++i) { - // The round constant `t` provides additional randomness - uint32_t t = (i < 16) ? 0x79CC4519 : 0x7A879D8A; - uint32_t rot = SM3_ROTATE_32(x[0], 12); - uint32_t ss1 = SM3_ROTATE_32(rot + x[4] + SM3_ROTATE_32(t, i % 32), 7); - uint32_t ss2 = ss1 ^ rot; - uint32_t w_i = w[i] ^ w[i + 4]; - - uint32_t tt1, tt2; - if (i < 16) { - tt1 = (x[0] ^ x[1] ^ x[2]) + x[3] + ss2 + w_i; - tt2 = (x[4] ^ x[5] ^ x[6]) + x[7] + ss1 + w[i]; - } else { - tt1 = ((x[0] & x[1]) | (x[0] & x[2]) | (x[1] & x[2])) + x[3] + ss2 + w_i; - tt2 = ((x[4] & x[5]) | (~x[4] & x[6])) + x[7] + ss1 + w[i]; + for (int i = 0; i < 8; ++i) { + x[i] = s[i]; + } + + // `w` contains 16 of the expanded words. + uint32_t w[16]; + for (int i = 0; i < 16; ++i) { + w[i] = REVERSE_BYTES_32(s[i + 8]); + } + + // Compress first 12 words. + for (int i = 0; i < 12; ++i) { + SM3_COMPRESS_STEP(i, w[i], w[i + 4]); + } + // Compress and expand the remaining 4 words. + for (int i = 0; i < 4; ++i) { + w[i] = + SM3_EXPAND_STEP(w[i], w[3 + i], w[7 + i], w[10 + i], w[(13 + i) % 16]); + SM3_COMPRESS_STEP(i + 12, w[i + 12], w[i]); + } + + // Rounds 16 to 64 + for (int j = 16; j < 64; j += 16) { + // Expand and then compress the first 12 words as the remaining 4 need to be + // handled differently in this implementation. + for (int i = 0; i < 12; ++i) { + w[4 + i] = SM3_EXPAND_STEP(w[4 + i], w[(7 + i) % 16], w[(11 + i) % 16], + w[(14 + i) % 16], w[(1 + i) % 16]); + } + for (int i = 0; i < 12; ++i) { + SM3_COMPRESS_STEP(i + j, w[i], w[i + 4]); } - x[3] = x[2]; - x[2] = SM3_ROTATE_32(x[1], 9); - x[1] = x[0]; - x[0] = tt1; - x[7] = x[6]; - x[6] = SM3_ROTATE_32(x[5], 19); - x[5] = x[4]; - x[4] = SM3_P0(tt2); + // Now expand and compress the remaining 4 words. + for (int i = 0; i < 4; ++i) { + w[i] = SM3_EXPAND_STEP(w[i], w[3 + i], w[7 + i], w[10 + i], + w[(13 + i) % 16]); + SM3_COMPRESS_STEP(i + j + 12, w[i + 12], w[i]); + } } // Xor `s` with `x` @@ -88,14 +117,18 @@ void sm3_hash(uint8_t hash[32], const uint8_t *message, size_t len) { // Hash complete blocks first while (remaining >= SM3_BLOCK_SIZE) { - memcpy(&s[8], m, SM3_BLOCK_SIZE); + for (int i = 0; i < SM3_BLOCK_SIZE; ++i) { + b[i] = m[i]; + } sm3_compress(s); remaining -= SM3_BLOCK_SIZE; m += SM3_BLOCK_SIZE; } // Hash the last block with padding - memcpy(b, m, remaining); + for (int i = 0; i < remaining; ++i) { + b[i] = m[i]; + } // Append bit 1 after the message b[remaining] = 0b10000000; ++remaining; @@ -108,8 +141,8 @@ void sm3_hash(uint8_t hash[32], const uint8_t *message, size_t len) { memset(&b[remaining], 0x00, SM3_BLOCK_SIZE - 8 - remaining); // Append the length of the message in bits uint64_t bitlen = 8 * (uint64_t)len; - s[22] = REVERSE_BITS_32((uint32_t)(bitlen >> 32)); - s[23] = REVERSE_BITS_32((uint32_t)bitlen); + s[22] = REVERSE_BYTES_32((uint32_t)(bitlen >> 32)); + s[23] = REVERSE_BYTES_32((uint32_t)bitlen); sm3_compress(s); // stores `s` in `hash` in big-endian diff --git a/benchmarks/test/test_hash_sm3.c b/benchmarks/test/test_hash_sm3.c index 6d098323..43434606 100644 --- a/benchmarks/test/test_hash_sm3.c +++ b/benchmarks/test/test_hash_sm3.c @@ -136,7 +136,9 @@ int main(int argc, char **argv) { {0x92, 0xA2, 0x58, 0x45, 0x27, 0x64, 0x32, 0x98, 0xF8, 0xE6, 0x65, 0xCE, 0xE4, 0x25, 0x4C, 0xAF, 0x1D, 0xC0, 0xA4, 0xAF, 0xFA, 0x23, 0x69, 0xED, 0x9F, 0xBA, 0x6E, 0xDF, 0x63, 0x69, 0xCE, 0x9B}, - {0xd9, 0x20, 0xd8, 0x2c, 0xc4, 0x3a, 0xb3, 0x85, 0x2f, 0x0d, 0x8b, 0x21, 0xaa, 0xc7, 0xde, 0xee, 0x78, 0xd6, 0xdd, 0x70, 0xd3, 0x04, 0x39, 0xdb, 0x47, 0xdc, 0x7b, 0x59, 0xb8, 0x5c, 0x34, 0x65}, + {0x04, 0xA1, 0x68, 0x86, 0x3B, 0x4B, 0x3B, 0x17, 0x11, 0xB7, 0x60, + 0x9A, 0xEA, 0x16, 0xC3, 0xC0, 0xC2, 0x5A, 0x0A, 0xC1, 0xF4, 0x74, + 0xE1, 0x7F, 0x4F, 0x3C, 0xBE, 0xAD, 0xE6, 0x68, 0x1D, 0xE9}, }; for (int i = 0; i < TEST_COUNT; i++) { @@ -152,7 +154,7 @@ int main(int argc, char **argv) { printf("#\n# test %d/%d\n", i, TEST_COUNT); - printf("input_len = %lu\n", message_lengths[i]); + printf("input_len = %lu\n", (long unsigned int)message_lengths[i]); printf("input_data = "); puthex_py(messages[i], message_lengths[i]); @@ -174,7 +176,7 @@ int main(int argc, char **argv) { puthex_py(expected_digests[i], 32); printf("\n"); - printf("if( expected_digest != expected_digest ):\n"); + printf("if( actual_digest != expected_digest ):\n"); printf(" print(\"Test %d failed.\")\n", i); printf( " print( 'input == %%s' %% ( binascii.b2a_hex( input_data ) ) )" From c6cb8aee959388a898e1cd2b4512a7553fdbcccd Mon Sep 17 00:00:00 2001 From: Pau Kaifler Date: Tue, 16 Feb 2021 11:56:16 +0100 Subject: [PATCH 6/6] benchmarks: use GREV and ROL for RV64 --- benchmarks/sm3/reference/sm3.c | 7 +++---- benchmarks/sm3/zscrypto_rv64/sm3.c | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/benchmarks/sm3/reference/sm3.c b/benchmarks/sm3/reference/sm3.c index ad9af8d3..ee000105 100644 --- a/benchmarks/sm3/reference/sm3.c +++ b/benchmarks/sm3/reference/sm3.c @@ -3,17 +3,16 @@ #include #include "riscvcrypto/sm3/api_sm3.h" +#include "rvintrin.h" // The block size in bytes #define SM3_BLOCK_SIZE (16 * sizeof(uint32_t)) // Reverses the byte order of `V` -#define REVERSE_BYTES_32(V) \ - (((V & 0x000000FF) << 24) | (((V)&0x0000FF00) << 8) | \ - (((V)&0x00FF0000) >> 8) | (((V)&0xFF000000) >> 24)) +#define REVERSE_BYTES_32(V) (_rv32_grev((V), 0x18)) // Rotates `V` by `N` bits to the left -#define SM3_ROTATE_32(V, N) (((V) << (N)) | ((V) >> (32 - (N)))) +#define SM3_ROTATE_32(V, N) (_rv32_rol((V), (N))) // The two permutation functions #define SM3_P0(X) ((X) ^ SM3_ROTATE_32((X), 9) ^ SM3_ROTATE_32((X), 17)) diff --git a/benchmarks/sm3/zscrypto_rv64/sm3.c b/benchmarks/sm3/zscrypto_rv64/sm3.c index a236c5d5..e6bf1b99 100644 --- a/benchmarks/sm3/zscrypto_rv64/sm3.c +++ b/benchmarks/sm3/zscrypto_rv64/sm3.c @@ -4,17 +4,16 @@ #include "riscvcrypto/share/riscv-crypto-intrinsics.h" #include "riscvcrypto/sm3/api_sm3.h" +#include "rvintrin.h" // The block size in bytes #define SM3_BLOCK_SIZE (16 * sizeof(uint32_t)) // Reverses the byte order of `V` -#define REVERSE_BYTES_32(V) \ - (((V & 0x000000FF) << 24) | (((V)&0x0000FF00) << 8) | \ - (((V)&0x00FF0000) >> 8) | (((V)&0xFF000000) >> 24)) +#define REVERSE_BYTES_32(V) (_rv32_grev((V), 0x18)) // Rotates `V` by `N` bits to the left -#define SM3_ROTATE_32(V, N) (((V) << (N)) | ((V) >> (32 - (N)))) +#define SM3_ROTATE_32(V, N) (_rv32_rol((V), (N))) // The two permutation functions #define SM3_P0(X) _sm3p0((X))