From cc456814c58bf6c28ffbc281c93677f98d774c56 Mon Sep 17 00:00:00 2001 From: Amir Abrams Date: Tue, 4 Sep 2018 03:30:06 -0500 Subject: [PATCH] Add Dynamic Argon2d algorithm to YIIMP Stratum Pool (#301) - Use dynode instead of masternode for Dynamic - Updates Argon2d hash library (https://github.com/P-H-C/phc-winner-argon2) - Adds argon2d-dyn.conf sample template file - Code contributed by @BigEvilSoloMiner from Discord. - testing and contributions by @R3D5H1RT from Discord - testing and support by @InsaneITGeek from Discord. - Dynamic test stratum pool: https://testpool.dualityblocks.com --- .gitignore | 3 +- stratum/algos/ar2/argon2.c | 593 ++++++++++------- stratum/algos/ar2/argon2.h | 351 +++++----- stratum/algos/ar2/bench.c | 111 ---- stratum/algos/ar2/blake2/blamka-round-opt.h | 162 ----- stratum/algos/ar2/blake2b.c | 305 --------- stratum/algos/ar2/core.c | 615 ++++++++++++++++++ stratum/algos/ar2/{cores.h => core.h} | 118 ++-- stratum/algos/ar2/cores.c | 341 ---------- stratum/algos/ar2/encoding.c | 455 +++++++++++++ stratum/algos/ar2/encoding.h | 57 ++ stratum/algos/ar2/genkat.c | 182 ------ stratum/algos/ar2/genkat.h | 45 -- stratum/algos/ar2/opt.c | 315 +++++---- stratum/algos/ar2/opt.h | 49 -- stratum/algos/ar2/ref.c | 174 ----- stratum/algos/ar2/ref.h | 49 -- stratum/algos/ar2/run.c | 223 ------- stratum/algos/ar2/thread.c | 57 ++ stratum/algos/ar2/thread.h | 67 ++ stratum/algos/argon2a.c | 7 +- stratum/algos/argon2a.h | 2 +- stratum/algos/argon2d-dyn.c | 43 ++ stratum/algos/argon2d-dyn.h | 16 + stratum/algos/{ar2 => }/blake2/blake2-impl.h | 23 +- stratum/algos/{ar2 => }/blake2/blake2.h | 27 +- stratum/algos/blake2/blake2b.c | 390 +++++++++++ stratum/algos/blake2/blamka-round-opt.h | 476 ++++++++++++++ .../algos/{ar2 => }/blake2/blamka-round-ref.h | 19 +- stratum/algos/makefile | 6 +- stratum/coinbase.cpp | 63 ++ stratum/config.sample/argon2d-dyn.conf | 15 + stratum/stratum.cpp | 3 +- stratum/stratum.h | 2 +- web/yaamp/core/functions/yaamp.php | 4 + 35 files changed, 3131 insertions(+), 2237 deletions(-) delete mode 100644 stratum/algos/ar2/bench.c delete mode 100644 stratum/algos/ar2/blake2/blamka-round-opt.h delete mode 100644 stratum/algos/ar2/blake2b.c create mode 100644 stratum/algos/ar2/core.c rename stratum/algos/ar2/{cores.h => core.h} (62%) delete mode 100644 stratum/algos/ar2/cores.c create mode 100644 stratum/algos/ar2/encoding.c create mode 100644 stratum/algos/ar2/encoding.h delete mode 100644 stratum/algos/ar2/genkat.c delete mode 100644 stratum/algos/ar2/genkat.h delete mode 100644 stratum/algos/ar2/opt.h delete mode 100644 stratum/algos/ar2/ref.c delete mode 100644 stratum/algos/ar2/ref.h delete mode 100644 stratum/algos/ar2/run.c create mode 100644 stratum/algos/ar2/thread.c create mode 100644 stratum/algos/ar2/thread.h create mode 100644 stratum/algos/argon2d-dyn.c create mode 100644 stratum/algos/argon2d-dyn.h rename stratum/algos/{ar2 => }/blake2/blake2-impl.h (82%) rename stratum/algos/{ar2 => }/blake2/blake2.h (70%) create mode 100644 stratum/algos/blake2/blake2b.c create mode 100644 stratum/algos/blake2/blamka-round-opt.h rename stratum/algos/{ar2 => }/blake2/blamka-round-ref.h (75%) create mode 100644 stratum/config.sample/argon2d-dyn.conf diff --git a/.gitignore b/.gitignore index ee8ea0277..0e268bfdc 100644 --- a/.gitignore +++ b/.gitignore @@ -19,4 +19,5 @@ web/assets/ *.rej *.orig .idea/* -web/yaamp/.idea/ \ No newline at end of file +web/yaamp/.idea/ +*.0 diff --git a/stratum/algos/ar2/argon2.c b/stratum/algos/ar2/argon2.c index 58ef79faf..665f8b7f2 100644 --- a/stratum/algos/ar2/argon2.c +++ b/stratum/algos/ar2/argon2.c @@ -1,279 +1,378 @@ /* - * Argon2 source code package + * Argon2 reference source code package - reference C implementations * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. */ -#include #include +#include #include -#include #include "argon2.h" -#include "cores.h" - -/* Error messages */ -static const char *Argon2_ErrorMessage[] = { - /*{ARGON2_OK, */ "OK", - /*}, - - {ARGON2_OUTPUT_PTR_NULL, */ "Output pointer is NULL", - /*}, - -{ARGON2_OUTPUT_TOO_SHORT, */ "Output is too short", - /*}, -{ARGON2_OUTPUT_TOO_LONG, */ "Output is too long", - /*}, - -{ARGON2_PWD_TOO_SHORT, */ "Password is too short", - /*}, -{ARGON2_PWD_TOO_LONG, */ "Password is too long", - /*}, - -{ARGON2_SALT_TOO_SHORT, */ "Salt is too short", - /*}, -{ARGON2_SALT_TOO_LONG, */ "Salt is too long", - /*}, - -{ARGON2_AD_TOO_SHORT, */ "Associated data is too short", - /*}, -{ARGON2_AD_TOO_LONG, */ "Associated date is too long", - /*}, - -{ARGON2_SECRET_TOO_SHORT, */ "Secret is too short", - /*}, -{ARGON2_SECRET_TOO_LONG, */ "Secret is too long", - /*}, - -{ARGON2_TIME_TOO_SMALL, */ "Time cost is too small", - /*}, -{ARGON2_TIME_TOO_LARGE, */ "Time cost is too large", - /*}, - -{ARGON2_MEMORY_TOO_LITTLE, */ "Memory cost is too small", - /*}, -{ARGON2_MEMORY_TOO_MUCH, */ "Memory cost is too large", - /*}, - -{ARGON2_LANES_TOO_FEW, */ "Too few lanes", - /*}, -{ARGON2_LANES_TOO_MANY, */ "Too many lanes", - /*}, - -{ARGON2_PWD_PTR_MISMATCH, */ "Password pointer is NULL, but password length is not 0", - /*}, -{ARGON2_SALT_PTR_MISMATCH, */ "Salt pointer is NULL, but salt length is not 0", - /*}, -{ARGON2_SECRET_PTR_MISMATCH, */ "Secret pointer is NULL, but secret length is not 0", - /*}, -{ARGON2_AD_PTR_MISMATCH, */ "Associated data pointer is NULL, but ad length is not 0", - /*}, - -{ARGON2_MEMORY_ALLOCATION_ERROR, */ "Memory allocation error", - /*}, - -{ARGON2_FREE_MEMORY_CBK_NULL, */ "The free memory callback is NULL", - /*}, -{ARGON2_ALLOCATE_MEMORY_CBK_NULL, */ "The allocate memory callback is NULL", - /*}, - -{ARGON2_INCORRECT_PARAMETER, */ "Argon2_Context context is NULL", - /*}, -{ARGON2_INCORRECT_TYPE, */ "There is no such version of Argon2", - /*}, - -{ARGON2_OUT_PTR_MISMATCH, */ "Output pointer mismatch", - /*}, - -{ARGON2_THREADS_TOO_FEW, */ "Not enough threads", - /*}, -{ARGON2_THREADS_TOO_MANY, */ "Too many threads", - /*}, -{ARGON2_MISSING_ARGS, */ "Missing arguments", /*},*/ -}; - -int argon2d(argon2_context *context) { return argon2_core(context, Argon2_d); } - -int argon2i(argon2_context *context) { return argon2_core(context, Argon2_i); } - -int verify_d(argon2_context *context, const char *hash) { - int result; - /*if (0 == context->outlen || NULL == hash) { - return ARGON2_OUT_PTR_MISMATCH; - }*/ +#include "encoding.h" +#include "core.h" + +const char *argon2_type2string(argon2_type type, int uppercase) { + switch (type) { + case Argon2_d: + return uppercase ? "Argon2d" : "argon2d"; + } + + return NULL; +} + +int argon2_ctx(argon2_context *context, argon2_type type) { + /* 1. Validate all inputs */ + int result = validate_inputs(context); + uint32_t memory_blocks, segment_length; + argon2_instance_t instance; + + if (ARGON2_OK != result) { + return result; + } + + if (Argon2_d != type) { + return ARGON2_INCORRECT_TYPE; + } + + /* 2. Align memory size */ + /* Minimum memory_blocks = 8L blocks, where L is the number of lanes */ + memory_blocks = context->m_cost; + + if (memory_blocks < 2 * ARGON2_SYNC_POINTS * context->lanes) { + memory_blocks = 2 * ARGON2_SYNC_POINTS * context->lanes; + } + + segment_length = memory_blocks / (context->lanes * ARGON2_SYNC_POINTS); + /* Ensure that all segments have equal length */ + memory_blocks = segment_length * (context->lanes * ARGON2_SYNC_POINTS); + + instance.memory = NULL; + instance.passes = context->t_cost; + instance.memory_blocks = memory_blocks; + instance.segment_length = segment_length; + instance.lane_length = segment_length * ARGON2_SYNC_POINTS; + instance.lanes = context->lanes; + instance.threads = context->threads; + instance.type = type; + + if (instance.threads > instance.lanes) { + instance.threads = instance.lanes; + } - result = argon2_core(context, Argon2_d); + /* 3. Initialization: Hashing inputs, allocating memory, filling first + * blocks + */ + result = initialize(&instance, context); if (ARGON2_OK != result) { return result; } - return 0 == memcmp(hash, context->out, 32); + /* 4. Filling memory */ + result = fill_memory_blocks(&instance); + + if (ARGON2_OK != result) { + return result; + } + /* 5. Finalization */ + finalize(context, &instance); + + return ARGON2_OK; } -const char *error_message(int error_code) { - enum { - /* Make sure---at compile time---that the enum size matches the array - size */ - ERROR_STRING_CHECK = - 1 / - !!((sizeof(Argon2_ErrorMessage) / sizeof(Argon2_ErrorMessage[0])) == - ARGON2_ERROR_CODES_LENGTH) - }; - if (error_code < ARGON2_ERROR_CODES_LENGTH) { - return Argon2_ErrorMessage[(argon2_error_codes)error_code]; +int argon2_hash(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, const size_t saltlen, + void *hash, const size_t hashlen, char *encoded, + const size_t encodedlen, argon2_type type){ + + argon2_context context; + int result; + uint8_t *out; + + if (pwdlen > ARGON2_MAX_PWD_LENGTH) { + return ARGON2_PWD_TOO_LONG; + } + + if (saltlen > ARGON2_MAX_SALT_LENGTH) { + return ARGON2_SALT_TOO_LONG; + } + + if (hashlen > ARGON2_MAX_OUTLEN) { + return ARGON2_OUTPUT_TOO_LONG; + } + + if (hashlen < ARGON2_MIN_OUTLEN) { + return ARGON2_OUTPUT_TOO_SHORT; + } + + out = malloc(hashlen); + if (!out) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + context.out = (uint8_t *)out; + context.outlen = (uint32_t)hashlen; + context.pwd = CONST_CAST(uint8_t *)pwd; + context.pwdlen = (uint32_t)pwdlen; + context.salt = CONST_CAST(uint8_t *)salt; + context.saltlen = (uint32_t)saltlen; + context.secret = NULL; + context.secretlen = 0; + context.ad = NULL; + context.adlen = 0; + context.t_cost = t_cost; + context.m_cost = m_cost; + context.lanes = parallelism; + context.threads = parallelism; + context.allocate_cbk = NULL; + context.free_cbk = NULL; + context.flags = ARGON2_DEFAULT_FLAGS; + + result = argon2_ctx(&context, type); + + if (result != ARGON2_OK) { + clear_internal_memory(out, hashlen); + free(out); + return result; + } + + /* if raw hash requested, write it */ + if (hash) { + memcpy(hash, out, hashlen); + } + + /* if encoding requested, write it */ + if (encoded && encodedlen) { + if (encode_string(encoded, encodedlen, &context, type) != ARGON2_OK) { + clear_internal_memory(out, hashlen); /* wipe buffers if error */ + clear_internal_memory(encoded, encodedlen); + free(out); + return ARGON2_ENCODING_FAIL; + } } - return "Unknown error code."; + clear_internal_memory(out, hashlen); + free(out); + + return ARGON2_OK; } -/* encoding/decoding helpers */ +int argon2d_hash_encoded(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, const size_t hashlen, + char *encoded, const size_t encodedlen) { -/* - * Some macros for constant-time comparisons. These work over values in - * the 0..255 range. Returned value is 0x00 on "false", 0xFF on "true". - */ -#define EQ(x, y) ((((0U - ((unsigned)(x) ^ (unsigned)(y))) >> 8) & 0xFF) ^ 0xFF) -#define GT(x, y) ((((unsigned)(y) - (unsigned)(x)) >> 8) & 0xFF) -#define GE(x, y) (GT(y, x) ^ 0xFF) -#define LT(x, y) GT(y, x) -#define LE(x, y) GE(y, x) + return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + NULL, hashlen, encoded, encodedlen, Argon2_d); +} -/* - * Convert value x (0..63) to corresponding Base64 character. - */ -static int b64_byte_to_char(unsigned x) { - return (LT(x, 26) & (x + 'A')) | - (GE(x, 26) & LT(x, 52) & (x + ('a' - 26))) | - (GE(x, 52) & LT(x, 62) & (x + ('0' - 52))) | (EQ(x, 62) & '+') | - (EQ(x, 63) & '/'); +int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, const size_t hashlen) { + + return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + hash, hashlen, NULL, 0, Argon2_d); } -/* - * Convert some bytes to Base64. 'dst_len' is the length (in characters) - * of the output buffer 'dst'; if that buffer is not large enough to - * receive the result (including the terminating 0), then (size_t)-1 - * is returned. Otherwise, the zero-terminated Base64 string is written - * in the buffer, and the output length (counted WITHOUT the terminating - * zero) is returned. - */ -static size_t to_base64(char *dst, size_t dst_len, const void *src) { - size_t olen; - const unsigned char *buf; - unsigned acc, acc_len; - - olen = 43; - /*switch (32 % 3) { - case 2: - olen++;*/ - /* fall through */ - /*case 1: - olen += 2; - break; - }*/ - if (dst_len <= olen) { - return (size_t)-1; +static int argon2_compare(const uint8_t *b1, const uint8_t *b2, size_t len) { + size_t i; + uint8_t d = 0U; + + for (i = 0U; i < len; i++) { + d |= b1[i] ^ b2[i]; } - acc = 0; - acc_len = 0; - buf = (const unsigned char *)src; - size_t src_len = 32; - while (src_len-- > 0) { - acc = (acc << 8) + (*buf++); - acc_len += 8; - while (acc_len >= 6) { - acc_len -= 6; - *dst++ = b64_byte_to_char((acc >> acc_len) & 0x3F); - } + return (int)((1 & ((d - 1) >> 8)) - 1); +} + +int argon2_verify(const char *encoded, const void *pwd, const size_t pwdlen, + argon2_type type) { + + argon2_context ctx; + uint8_t *desired_result = NULL; + + int ret = ARGON2_OK; + + size_t encoded_len; + uint32_t max_field_len; + + if (pwdlen > ARGON2_MAX_PWD_LENGTH) { + return ARGON2_PWD_TOO_LONG; + } + + if (encoded == NULL) { + return ARGON2_DECODING_FAIL; + } + + encoded_len = strlen(encoded); + if (encoded_len > UINT32_MAX) { + return ARGON2_DECODING_FAIL; + } + + /* No field can be longer than the encoded length */ + max_field_len = (uint32_t)encoded_len; + + ctx.saltlen = max_field_len; + ctx.outlen = max_field_len; + + ctx.salt = malloc(ctx.saltlen); + ctx.out = malloc(ctx.outlen); + if (!ctx.salt || !ctx.out) { + ret = ARGON2_MEMORY_ALLOCATION_ERROR; + goto fail; + } + + ctx.pwd = (uint8_t *)pwd; + ctx.pwdlen = (uint32_t)pwdlen; + + ret = decode_string(&ctx, encoded, type); + if (ret != ARGON2_OK) { + goto fail; } - if (acc_len > 0) { - *dst++ = b64_byte_to_char((acc << (6 - acc_len)) & 0x3F); + + /* Set aside the desired result, and get a new buffer. */ + desired_result = ctx.out; + ctx.out = malloc(ctx.outlen); + if (!ctx.out) { + ret = ARGON2_MEMORY_ALLOCATION_ERROR; + goto fail; } - *dst++ = 0; - return olen; + + ret = argon2_verify_ctx(&ctx, (char *)desired_result, type); + if (ret != ARGON2_OK) { + goto fail; + } + +fail: + free(ctx.salt); + free(ctx.out); + free(desired_result); + + return ret; } -/* ==================================================================== */ -/* - * Code specific to Argon2i. - * - * The code below applies the following format: - * - * $argon2i$m=,t=,p=[,keyid=][,data=][$[$]] - * - * where is a decimal integer (positive, fits in an 'unsigned long') - * and is Base64-encoded data (no '=' padding characters, no newline - * or whitespace). The "keyid" is a binary identifier for a key (up to 8 - * bytes); "data" is associated data (up to 32 bytes). When the 'keyid' - * (resp. the 'data') is empty, then it is ommitted from the output. - * - * The last two binary chunks (encoded in Base64) are, in that order, - * the salt and the output. Both are optional, but you cannot have an - * output without a salt. The binary salt length is between 8 and 48 bytes. - * The output length is always exactly 32 bytes. - */ +int argon2d_verify(const char *encoded, const void *pwd, const size_t pwdlen) { -int encode_string(char *dst, size_t dst_len, argon2_context *ctx) { -#define SS(str) \ - do { \ - size_t pp_len = strlen(str); \ - if (pp_len >= dst_len) { \ - return 0; \ - } \ - memcpy(dst, str, pp_len + 1); \ - dst += pp_len; \ - dst_len -= pp_len; \ - } while (0) - -#define SX(x) \ - do { \ - char tmp[30]; \ - sprintf(tmp, "%lu", (unsigned long)(x)); \ - SS(tmp); \ - } while (0); - -#define SB(buf) \ - do { \ - size_t sb_len = to_base64(dst, dst_len, buf); \ - if (sb_len == (size_t)-1) { \ - return 0; \ - } \ - dst += sb_len; \ - dst_len -= sb_len; \ - } while (0); - - SS("$argon2i$m="); - SX(16); - SS(",t="); - SX(2); - SS(",p="); - SX(1); - - /*if (ctx->adlen > 0) { - SS(",data="); - SB(ctx->ad, ctx->adlen); - }*/ - - /*if (ctx->saltlen == 0) - return 1;*/ - - SS("$"); - SB(ctx->salt); - - /*if (ctx->outlen32 == 0) - return 1;*/ - - SS("$"); - SB(ctx->out); - return 1; - -#undef SS -#undef SX -#undef SB + return argon2_verify(encoded, pwd, pwdlen, Argon2_d); } + +int argon2d_ctx(argon2_context *context) { + return argon2_ctx(context, Argon2_d); +} + +int argon2_verify_ctx(argon2_context *context, const char *hash, + argon2_type type) { + int ret = argon2_ctx(context, type); + if (ret != ARGON2_OK) { + return ret; + } + + if (argon2_compare((uint8_t *)hash, context->out, context->outlen)) { + return ARGON2_VERIFY_MISMATCH; + } + + return ARGON2_OK; +} + +int argon2d_verify_ctx(argon2_context *context, const char *hash) { + return argon2_verify_ctx(context, hash, Argon2_d); +} + +const char *argon2_error_message(int error_code) { + switch (error_code) { + case ARGON2_OK: + return "OK"; + case ARGON2_OUTPUT_PTR_NULL: + return "Output pointer is NULL"; + case ARGON2_OUTPUT_TOO_SHORT: + return "Output is too short"; + case ARGON2_OUTPUT_TOO_LONG: + return "Output is too long"; + case ARGON2_PWD_TOO_SHORT: + return "Password is too short"; + case ARGON2_PWD_TOO_LONG: + return "Password is too long"; + case ARGON2_SALT_TOO_SHORT: + return "Salt is too short"; + case ARGON2_SALT_TOO_LONG: + return "Salt is too long"; + case ARGON2_AD_TOO_SHORT: + return "Associated data is too short"; + case ARGON2_AD_TOO_LONG: + return "Associated data is too long"; + case ARGON2_SECRET_TOO_SHORT: + return "Secret is too short"; + case ARGON2_SECRET_TOO_LONG: + return "Secret is too long"; + case ARGON2_TIME_TOO_SMALL: + return "Time cost is too small"; + case ARGON2_TIME_TOO_LARGE: + return "Time cost is too large"; + case ARGON2_MEMORY_TOO_LITTLE: + return "Memory cost is too small"; + case ARGON2_MEMORY_TOO_MUCH: + return "Memory cost is too large"; + case ARGON2_LANES_TOO_FEW: + return "Too few lanes"; + case ARGON2_LANES_TOO_MANY: + return "Too many lanes"; + case ARGON2_PWD_PTR_MISMATCH: + return "Password pointer is NULL, but password length is not 0"; + case ARGON2_SALT_PTR_MISMATCH: + return "Salt pointer is NULL, but salt length is not 0"; + case ARGON2_SECRET_PTR_MISMATCH: + return "Secret pointer is NULL, but secret length is not 0"; + case ARGON2_AD_PTR_MISMATCH: + return "Associated data pointer is NULL, but ad length is not 0"; + case ARGON2_MEMORY_ALLOCATION_ERROR: + return "Memory allocation error"; + case ARGON2_FREE_MEMORY_CBK_NULL: + return "The free memory callback is NULL"; + case ARGON2_ALLOCATE_MEMORY_CBK_NULL: + return "The allocate memory callback is NULL"; + case ARGON2_INCORRECT_PARAMETER: + return "Argon2_Context context is NULL"; + case ARGON2_INCORRECT_TYPE: + return "There is no such version of Argon2"; + case ARGON2_OUT_PTR_MISMATCH: + return "Output pointer mismatch"; + case ARGON2_THREADS_TOO_FEW: + return "Not enough threads"; + case ARGON2_THREADS_TOO_MANY: + return "Too many threads"; + case ARGON2_MISSING_ARGS: + return "Missing arguments"; + case ARGON2_ENCODING_FAIL: + return "Encoding failed"; + case ARGON2_DECODING_FAIL: + return "Decoding failed"; + case ARGON2_THREAD_FAIL: + return "Threading failure"; + case ARGON2_DECODING_LENGTH_FAIL: + return "Some of encoded parameters are too long or too short"; + case ARGON2_VERIFY_MISMATCH: + return "The password does not match the supplied hash"; + default: + return "Unknown error code"; + } +} + +size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, uint32_t parallelism, + uint32_t saltlen, uint32_t hashlen, argon2_type type) { + return strlen("$$v=$m=,t=,p=$$") + strlen(argon2_type2string(type, 0)) + + numlen(t_cost) + numlen(m_cost) + numlen(parallelism) + + b64len(saltlen) + b64len(hashlen); +} \ No newline at end of file diff --git a/stratum/algos/ar2/argon2.h b/stratum/algos/ar2/argon2.h index cecb2c7c9..35e524303 100644 --- a/stratum/algos/ar2/argon2.h +++ b/stratum/algos/ar2/argon2.h @@ -1,18 +1,27 @@ /* - * Argon2 source code package + * Argon2 reference source code package - reference C implementations * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. */ + #ifndef ARGON2_H #define ARGON2_H +#if defined(HAVE_CONFIG_H) +#include "config/dynamic-config.h" +#endif + #include #include #include @@ -21,8 +30,18 @@ extern "C" { #endif -/*************************Argon2 input parameter - * restrictions**************************************************/ +/* Symbols visibility control */ +#ifdef A2_VISCTL +#define ARGON2_PUBLIC __attribute__((visibility("default"))) +#elif _MSC_VER +#define ARGON2_PUBLIC __declspec(dllexport) +#else +#define ARGON2_PUBLIC +#endif + +/* + * Argon2 input parameter restrictions + */ /* Minimum and maximum number of lanes (degree of parallelism) */ #define ARGON2_MIN_LANES UINT32_C(1) @@ -43,8 +62,7 @@ extern "C" { #define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */ #define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b)) -/* Max memory size is half the addressing space, topping at 2^32 blocks (4 TB) - */ +/* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */ #define ARGON2_MAX_MEMORY_BITS \ ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1)) #define ARGON2_MAX_MEMORY \ @@ -70,65 +88,74 @@ extern "C" { #define ARGON2_MIN_SECRET UINT32_C(0) #define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF) +/* Flags to determine which fields are securely wiped (default = no wipe). */ +#define ARGON2_DEFAULT_FLAGS UINT32_C(0) #define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0) #define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1) -#define ARGON2_FLAG_CLEAR_MEMORY (UINT32_C(1) << 2) -#define ARGON2_DEFAULT_FLAGS \ - (ARGON2_FLAG_CLEAR_PASSWORD | ARGON2_FLAG_CLEAR_MEMORY) + +/* Global flag to determine if we are wiping internal memory buffers. This flag + * is defined in core.c and deafults to 1 (wipe internal memory). */ +extern int FLAG_clear_internal_memory; /* Error codes */ typedef enum Argon2_ErrorCodes { ARGON2_OK = 0, - ARGON2_OUTPUT_PTR_NULL = 1, + ARGON2_OUTPUT_PTR_NULL = -1, + + ARGON2_OUTPUT_TOO_SHORT = -2, + ARGON2_OUTPUT_TOO_LONG = -3, + + ARGON2_PWD_TOO_SHORT = -4, + ARGON2_PWD_TOO_LONG = -5, + + ARGON2_SALT_TOO_SHORT = -6, + ARGON2_SALT_TOO_LONG = -7, + + ARGON2_AD_TOO_SHORT = -8, + ARGON2_AD_TOO_LONG = -9, - ARGON2_OUTPUT_TOO_SHORT = 2, - ARGON2_OUTPUT_TOO_LONG = 3, + ARGON2_SECRET_TOO_SHORT = -10, + ARGON2_SECRET_TOO_LONG = -11, - ARGON2_PWD_TOO_SHORT = 4, - ARGON2_PWD_TOO_LONG = 5, + ARGON2_TIME_TOO_SMALL = -12, + ARGON2_TIME_TOO_LARGE = -13, - ARGON2_SALT_TOO_SHORT = 6, - ARGON2_SALT_TOO_LONG = 7, + ARGON2_MEMORY_TOO_LITTLE = -14, + ARGON2_MEMORY_TOO_MUCH = -15, - ARGON2_AD_TOO_SHORT = 8, - ARGON2_AD_TOO_LONG = 9, + ARGON2_LANES_TOO_FEW = -16, + ARGON2_LANES_TOO_MANY = -17, - ARGON2_SECRET_TOO_SHORT = 10, - ARGON2_SECRET_TOO_LONG = 11, + ARGON2_PWD_PTR_MISMATCH = -18, /* NULL ptr with non-zero length */ + ARGON2_SALT_PTR_MISMATCH = -19, /* NULL ptr with non-zero length */ + ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */ + ARGON2_AD_PTR_MISMATCH = -21, /* NULL ptr with non-zero length */ - ARGON2_TIME_TOO_SMALL = 12, - ARGON2_TIME_TOO_LARGE = 13, + ARGON2_MEMORY_ALLOCATION_ERROR = -22, - ARGON2_MEMORY_TOO_LITTLE = 14, - ARGON2_MEMORY_TOO_MUCH = 15, + ARGON2_FREE_MEMORY_CBK_NULL = -23, + ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24, - ARGON2_LANES_TOO_FEW = 16, - ARGON2_LANES_TOO_MANY = 17, + ARGON2_INCORRECT_PARAMETER = -25, + ARGON2_INCORRECT_TYPE = -26, - ARGON2_PWD_PTR_MISMATCH = 18, /* NULL ptr with non-zero length */ - ARGON2_SALT_PTR_MISMATCH = 19, /* NULL ptr with non-zero length */ - ARGON2_SECRET_PTR_MISMATCH = 20, /* NULL ptr with non-zero length */ - ARGON2_AD_PTR_MISMATCH = 21, /* NULL ptr with non-zero length */ + ARGON2_OUT_PTR_MISMATCH = -27, - ARGON2_MEMORY_ALLOCATION_ERROR = 22, + ARGON2_THREADS_TOO_FEW = -28, + ARGON2_THREADS_TOO_MANY = -29, - ARGON2_FREE_MEMORY_CBK_NULL = 23, - ARGON2_ALLOCATE_MEMORY_CBK_NULL = 24, + ARGON2_MISSING_ARGS = -30, - ARGON2_INCORRECT_PARAMETER = 25, - ARGON2_INCORRECT_TYPE = 26, + ARGON2_ENCODING_FAIL = -31, - ARGON2_OUT_PTR_MISMATCH = 27, + ARGON2_DECODING_FAIL = -32, - ARGON2_THREADS_TOO_FEW = 28, - ARGON2_THREADS_TOO_MANY = 29, + ARGON2_THREAD_FAIL = -33, - ARGON2_MISSING_ARGS = 30, + ARGON2_DECODING_LENGTH_FAIL = -34, - ARGON2_ERROR_CODES_LENGTH /* Do NOT remove; Do NOT add error codes after - this - error code */ + ARGON2_VERIFY_MISMATCH = -35 } argon2_error_codes; /* Memory allocator types --- for external allocation */ @@ -138,155 +165,173 @@ typedef void (*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate); /* Argon2 external data structures */ /* - *****Context: structure to hold Argon2 inputs: - * output array and its length, - * password and its length, - * salt and its length, - * secret and its length, - * associated data and its length, - * number of passes, amount of used memory (in KBytes, can be rounded up a bit) - * number of parallel threads that will be run. + ***** + * Context: structure to hold Argon2 inputs: + * output array and its length, + * password and its length, + * salt and its length, + * secret and its length, + * associated data and its length, + * number of passes, amount of used memory (in KBytes, can be rounded up a bit) + * number of parallel threads that will be run. * All the parameters above affect the output hash value. * Additionally, two function pointers can be provided to allocate and - deallocate the memory (if NULL, memory will be allocated internally). + * deallocate the memory (if NULL, memory will be allocated internally). * Also, three flags indicate whether to erase password, secret as soon as they - are pre-hashed (and thus not needed anymore), and the entire memory - **************************** - Simplest situation: you have output array out[8], password is stored in - pwd[32], salt is stored in salt[16], you do not have keys nor associated data. - You need to spend 1 GB of RAM and you run 5 passes of Argon2d with 4 parallel - lanes. - You want to erase the password, but you're OK with last pass not being erased. - You want to use the default memory allocator. + * are pre-hashed (and thus not needed anymore), and the entire memory + ***** + * Simplest situation: you have output array out[8], password is stored in + * pwd[32], salt is stored in salt[16], you do not have keys nor associated + * data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with + * 4 parallel lanes. + * You want to erase the password, but you're OK with last pass not being + * erased. You want to use the default memory allocator. + * Then you initialize: + Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false) */ typedef struct Argon2_Context { uint8_t *out; /* output array */ + uint32_t outlen; /* digest length */ + uint8_t *pwd; /* password array */ + uint32_t pwdlen; /* password length */ + uint8_t *salt; /* salt array */ - /*uint8_t *secret;*/ /* key array */ - /*uint8_t *ad;*/ /* associated data array */ + uint32_t saltlen; /* salt length */ + + uint8_t *secret; /* key array */ + uint32_t secretlen; /* key length */ + + uint8_t *ad; /* associated data array */ + uint32_t adlen; /* associated data length */ + + uint32_t t_cost; /* number of passes */ + uint32_t m_cost; /* amount of memory requested (KB) */ + uint32_t lanes; /* number of lanes */ + uint32_t threads; /* maximum number of threads */ allocate_fptr allocate_cbk; /* pointer to memory allocator */ deallocate_fptr free_cbk; /* pointer to memory deallocator */ - /*uint32_t outlen;*/ /* digest length */ - uint32_t pwdlen; /* password length */ - /*uint32_t saltlen;*/ /* salt length */ - /*uint32_t secretlen;*/ /* key length */ - /*uint32_t adlen;*/ /* associated data length */ - /*uint32_t t_cost;*/ /* number of passes */ - /*uint32_t m_cost;*/ /* amount of memory requested (KB) */ - /*uint32_t lanes;*/ /* number of lanes */ - /*uint32_t threads;*/ /* maximum number of threads */ - /*uint32_t flags;*/ /* array of bool options */ - + uint32_t flags; /* array of bool options */ } argon2_context; -/** - * Function to hash the inputs in the memory-hard fashion (uses Argon2i) - * @param out Pointer to the memory where the hash digest will be written - * @param outlen Digest length in bytes - * @param in Pointer to the input (password) - * @param inlen Input length in bytes - * @param salt Pointer to the salt - * @param saltlen Salt length in bytes - * @pre @a out must have at least @a outlen bytes allocated - * @pre @a in must be at least @inlen bytes long - * @pre @a saltlen must be at least @saltlen bytes long - * @return Zero if successful, 1 otherwise. - */ -/*int hash_argon2i(void *out, size_t outlen, const void *in, size_t inlen, - const void *salt, size_t saltlen, unsigned int t_cost, - unsigned int m_cost);*/ - -/* same for argon2d */ -/*int hash_argon2d(void *out, size_t outlen, const void *in, size_t inlen, - const void *salt, size_t saltlen, unsigned int t_cost, - unsigned int m_cost);*/ +/* Argon2 primitive type */ +typedef enum Argon2_type { + Argon2_d = 0 +} argon2_type; /* - * **************Argon2d: Version of Argon2 that picks memory blocks depending - * on the password and salt. Only for side-channel-free - * environment!!*************** - * @param context Pointer to current Argon2 context - * @return Zero if successful, a non zero error code otherwise + * Function that gives the string representation of an argon2_type. + * @param type The argon2_type that we want the string for + * @param uppercase Whether the string should have the first letter uppercase + * @return NULL if invalid type, otherwise the string representation. */ -int argon2d(argon2_context *context); +ARGON2_PUBLIC const char *argon2_type2string(argon2_type type, int uppercase); /* - * * **************Argon2i: Version of Argon2 that picks memory blocks - *independent on the password and salt. Good for side-channels, - ******************* but worse w.r.t. tradeoff attacks if - *******************only one pass is used*************** - * @param context Pointer to current Argon2 context - * @return Zero if successful, a non zero error code otherwise + * Function that performs memory-hard hashing with certain degree of parallelism + * @param context Pointer to the Argon2 internal structure + * @return Error code if smth is wrong, ARGON2_OK otherwise */ -int argon2i(argon2_context *context); +ARGON2_PUBLIC int argon2_ctx(argon2_context *context, argon2_type type); -/* - * * **************Argon2di: Reserved name*************** - * @param context Pointer to current Argon2 context - * @return Zero if successful, a non zero error code otherwise +/** + * Hashes a password with Argon2i, producing a raw hash by allocating memory at + * @hash + * @param t_cost Number of iterations + * @param m_cost Sets memory usage to m_cost kibibytes + * @param parallelism Number of threads and compute lanes + * @param pwd Pointer to password + * @param pwdlen Password size in bytes + * @param salt Pointer to salt + * @param saltlen Salt size in bytes + * @param hash Buffer where to write the raw hash - updated by the function + * @param hashlen Desired length of the hash in bytes + * @pre Different parallelism levels will give different results + * @pre Returns ARGON2_OK if successful */ -int argon2di(argon2_context *context); +ARGON2_PUBLIC int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, + const size_t hashlen); + +ARGON2_PUBLIC int argon2d_hash_encoded(const uint32_t t_cost, + const uint32_t m_cost, + const uint32_t parallelism, + const void *pwd, const size_t pwdlen, + const void *salt, const size_t saltlen, + const size_t hashlen, char *encoded, + const size_t encodedlen); + +/* generic function underlying the above ones */ +ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, + const size_t hashlen, char *encoded, + const size_t encodedlen, argon2_type type); -/* - * * **************Argon2ds: Argon2d hardened against GPU attacks, 20% - * slower*************** - * @param context Pointer to current Argon2 context - * @return Zero if successful, a non zero error code otherwise +/** + * Verifies a password against an encoded string + * Encoded string is restricted as in validate_inputs() + * @param encoded String encoding parameters, salt, hash + * @param pwd Pointer to password + * @pre Returns ARGON2_OK if successful */ -int argon2ds(argon2_context *context); +ARGON2_PUBLIC int argon2d_verify(const char *encoded, const void *pwd, + const size_t pwdlen); -/* - * * **************Argon2id: First half-pass over memory is - *password-independent, the rest are password-dependent - ********************OK against side channels: they reduce to 1/2-pass - *Argon2i*************** +/* generic function underlying the above ones */ +ARGON2_PUBLIC int argon2_verify(const char *encoded, const void *pwd, + const size_t pwdlen, argon2_type type); + +/** + * Argon2d: Version of Argon2 that picks memory blocks depending + * on the password and salt. Only for side-channel-free + * environment!! + ***** * @param context Pointer to current Argon2 context * @return Zero if successful, a non zero error code otherwise */ -int argon2id(argon2_context *context); +ARGON2_PUBLIC int argon2d_ctx(argon2_context *context); -/* +/** * Verify if a given password is correct for Argon2d hashing * @param context Pointer to current Argon2 context * @param hash The password hash to verify. The length of the hash is * specified by the context outlen member * @return Zero if successful, a non zero error code otherwise */ -int verify_d(argon2_context *context, const char *hash); +ARGON2_PUBLIC int argon2d_verify_ctx(argon2_context *context, const char *hash); -/* +/* generic function underlying the above ones */ +ARGON2_PUBLIC int argon2_verify_ctx(argon2_context *context, const char *hash, + argon2_type type); + +/** * Get the associated error message for given error code * @return The error message associated with the given error code */ -const char *error_message(int error_code); +ARGON2_PUBLIC const char *argon2_error_message(int error_code); -/* ==================================================================== */ -/* - * Code specific to Argon2i. - * - * The code below applies the following format: - * - * $argon2i$m=,t=,p=[,keyid=][,data=][$[$]] - * - * where is a decimal integer (positive, fits in an 'unsigned long') - * and is Base64-encoded data (no '=' padding characters, no newline - * or whitespace). The "keyid" is a binary identifier for a key (up to 8 - * bytes); "data" is associated data (up to 32 bytes). When the 'keyid' - * (resp. the 'data') is empty, then it is ommitted from the output. - * - * The last two binary chunks (encoded in Base64) are, in that order, - * the salt and the output. Both are optional, but you cannot have an - * output without a salt. The binary salt length is between 8 and 48 bytes. - * The output length is always exactly 32 bytes. +/** + * Returns the encoded hash length for the given input parameters + * @param t_cost Number of iterations + * @param m_cost Memory usage in kibibytes + * @param parallelism Number of threads; used to compute lanes + * @param saltlen Salt size in bytes + * @param hashlen Hash size in bytes + * @param type The argon2_type that we want the encoded length for + * @return The encoded hash length in bytes */ - -int encode_string(char *dst, size_t dst_len, argon2_context *ctx); +ARGON2_PUBLIC size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, + uint32_t parallelism, uint32_t saltlen, + uint32_t hashlen, argon2_type type); #if defined(__cplusplus) } #endif -#endif +#endif \ No newline at end of file diff --git a/stratum/algos/ar2/bench.c b/stratum/algos/ar2/bench.c deleted file mode 100644 index 7a6edc5d3..000000000 --- a/stratum/algos/ar2/bench.c +++ /dev/null @@ -1,111 +0,0 @@ -#include -#include -#include -#include -#include -#ifdef _MSC_VER -#include -#endif - -#include "argon2.h" - -static uint64_t rdtsc(void) { -#ifdef _MSC_VER - return __rdtsc(); -#else -#if defined(__amd64__) || defined(__x86_64__) - uint64_t rax, rdx; - __asm__ __volatile__("rdtsc" : "=a"(rax), "=d"(rdx) : :); - return (rdx << 32) | rax; -#elif defined(__i386__) || defined(__i386) || defined(__X86__) - uint64_t rax; - __asm__ __volatile__("rdtsc" : "=A"(rax) : :); - return rax; -#else -#error "Not implemented!" -#endif -#endif -} - -/* - * Benchmarks Argon2 with salt length 16, password length 16, t_cost 1, - and different m_cost and threads - */ -static void benchmark() { -#define BENCH_OUTLEN 16 -#define BENCH_INLEN 16 - const uint32_t inlen = BENCH_INLEN; - const unsigned outlen = BENCH_OUTLEN; - unsigned char out[BENCH_OUTLEN]; - unsigned char pwd_array[BENCH_INLEN]; - unsigned char salt_array[BENCH_INLEN]; -#undef BENCH_INLEN -#undef BENCH_OUTLEN - - uint32_t t_cost = 1; - uint32_t m_cost; - uint32_t thread_test[6] = {1, 2, 4, 6, 8, 16}; - - memset(pwd_array, 0, inlen); - memset(salt_array, 1, inlen); - - for (m_cost = (uint32_t)1 << 10; m_cost <= (uint32_t)1 << 22; m_cost *= 2) { - unsigned i; - for (i = 0; i < 6; ++i) { - argon2_context context; - uint32_t thread_n = thread_test[i]; - uint64_t stop_cycles, stop_cycles_i; - clock_t stop_time; - uint64_t delta_d, delta_i; - double mcycles_d, mcycles_i, run_time; - - clock_t start_time = clock(); - uint64_t start_cycles = rdtsc(); - - context.out = out; - context.outlen = outlen; - context.pwd = pwd_array; - context.pwdlen = inlen; - context.salt = salt_array; - context.saltlen = inlen; - context.secret = NULL; - context.secretlen = 0; - context.ad = NULL; - context.adlen = 0; - context.t_cost = t_cost; - context.m_cost = m_cost; - context.lanes = thread_n; - context.threads = thread_n; - context.allocate_cbk = NULL; - context.free_cbk = NULL; - context.flags = 0; - - argon2d(&context); - stop_cycles = rdtsc(); - argon2i(&context); - stop_cycles_i = rdtsc(); - stop_time = clock(); - - delta_d = (stop_cycles - start_cycles) / (m_cost); - delta_i = (stop_cycles_i - stop_cycles) / (m_cost); - mcycles_d = (double)(stop_cycles - start_cycles) / (1UL << 20); - mcycles_i = (double)(stop_cycles_i - stop_cycles) / (1UL << 20); - printf("Argon2d %d iterations %d MiB %d threads: %2.2f cpb %2.2f " - "Mcycles \n", - t_cost, m_cost >> 10, thread_n, (float)delta_d / 1024, - mcycles_d); - printf("Argon2i %d iterations %d MiB %d threads: %2.2f cpb %2.2f " - "Mcycles \n", - t_cost, m_cost >> 10, thread_n, (float)delta_i / 1024, - mcycles_i); - - run_time = ((double)stop_time - start_time) / (CLOCKS_PER_SEC); - printf("%2.4f seconds\n\n", run_time); - } - } -} - -int main() { - benchmark(); - return ARGON2_OK; -} diff --git a/stratum/algos/ar2/blake2/blamka-round-opt.h b/stratum/algos/ar2/blake2/blamka-round-opt.h deleted file mode 100644 index 690686d9e..000000000 --- a/stratum/algos/ar2/blake2/blamka-round-opt.h +++ /dev/null @@ -1,162 +0,0 @@ -#ifndef BLAKE_ROUND_MKA_OPT_H -#define BLAKE_ROUND_MKA_OPT_H - -#include "blake2-impl.h" - -#if defined(_MSC_VER) -#include -#endif - -#include -#if defined(__XOP__) && (defined(__GNUC__) || defined(__clang__)) -#include -#endif - -#if !defined(__XOP__) -#if defined(__SSSE3__) -#define r16 \ - (_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)) -#define r24 \ - (_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)) -#define _mm_roti_epi64(x, c) \ - (-(c) == 32) \ - ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \ - : (-(c) == 24) \ - ? _mm_shuffle_epi8((x), r24) \ - : (-(c) == 16) \ - ? _mm_shuffle_epi8((x), r16) \ - : (-(c) == 63) \ - ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ - _mm_add_epi64((x), (x))) \ - : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ - _mm_slli_epi64((x), 64 - (-(c)))) -#else /* defined(__SSE2__) */ -#define _mm_roti_epi64(r, c) \ - _mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c)))) -#endif -#else -#endif - -static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) { - const __m128i z = _mm_mul_epu32(x, y); - return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z)); -} - -#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = fBlaMka(A0, B0); \ - A1 = fBlaMka(A1, B1); \ - \ - D0 = _mm_xor_si128(D0, A0); \ - D1 = _mm_xor_si128(D1, A1); \ - \ - D0 = _mm_roti_epi64(D0, -32); \ - D1 = _mm_roti_epi64(D1, -32); \ - \ - C0 = fBlaMka(C0, D0); \ - C1 = fBlaMka(C1, D1); \ - \ - B0 = _mm_xor_si128(B0, C0); \ - B1 = _mm_xor_si128(B1, C1); \ - \ - B0 = _mm_roti_epi64(B0, -24); \ - B1 = _mm_roti_epi64(B1, -24); \ - } while ((void)0, 0) - -#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - A0 = fBlaMka(A0, B0); \ - A1 = fBlaMka(A1, B1); \ - \ - D0 = _mm_xor_si128(D0, A0); \ - D1 = _mm_xor_si128(D1, A1); \ - \ - D0 = _mm_roti_epi64(D0, -16); \ - D1 = _mm_roti_epi64(D1, -16); \ - \ - C0 = fBlaMka(C0, D0); \ - C1 = fBlaMka(C1, D1); \ - \ - B0 = _mm_xor_si128(B0, C0); \ - B1 = _mm_xor_si128(B1, C1); \ - \ - B0 = _mm_roti_epi64(B0, -63); \ - B1 = _mm_roti_epi64(B1, -63); \ - } while ((void)0, 0) - -#if defined(__SSSE3__) -#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \ - __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \ - B0 = t0; \ - B1 = t1; \ - \ - t0 = C0; \ - C0 = C1; \ - C1 = t0; \ - \ - t0 = _mm_alignr_epi8(D1, D0, 8); \ - t1 = _mm_alignr_epi8(D0, D1, 8); \ - D0 = t1; \ - D1 = t0; \ - } while ((void)0, 0) - -#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \ - __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \ - B0 = t0; \ - B1 = t1; \ - \ - t0 = C0; \ - C0 = C1; \ - C1 = t0; \ - \ - t0 = _mm_alignr_epi8(D0, D1, 8); \ - t1 = _mm_alignr_epi8(D1, D0, 8); \ - D0 = t1; \ - D1 = t0; \ - } while ((void)0, 0) -#else /* SSE2 */ -#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = D0; \ - __m128i t1 = B0; \ - D0 = C0; \ - C0 = C1; \ - C1 = D0; \ - D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \ - D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \ - B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \ - B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \ - } while ((void)0, 0) - -#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ - do { \ - __m128i t0 = C0; \ - C0 = C1; \ - C1 = t0; \ - t0 = B0; \ - __m128i t1 = D0; \ - B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \ - B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \ - D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \ - D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \ - } while ((void)0, 0) -#endif - -#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ - do { \ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ - \ - DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ - \ - G1(A0, B0, C0, D0, A1, B1, C1, D1); \ - G2(A0, B0, C0, D0, A1, B1, C1, D1); \ - \ - UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ - } while ((void)0, 0) - -#endif diff --git a/stratum/algos/ar2/blake2b.c b/stratum/algos/ar2/blake2b.c deleted file mode 100644 index be691b153..000000000 --- a/stratum/algos/ar2/blake2b.c +++ /dev/null @@ -1,305 +0,0 @@ -#include -#include -#include -#include - -#include "blake2/blake2.h" -#include "blake2/blake2-impl.h" - -static const uint64_t blake2b_IV[8] = { - UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b), - UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1), - UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f), - UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) -}; - -static const unsigned int blake2b_sigma[12][16] = { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, - {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, - {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, - {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, - {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, - {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, - {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, - {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, - {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, -}; - -static BLAKE2_INLINE void blake2b_set_lastnode(blake2b_state *S) { - S->f[1] = (uint64_t)-1; -} - -static BLAKE2_INLINE void blake2b_set_lastblock(blake2b_state *S) { - if (S->last_node) { - blake2b_set_lastnode(S); - } - S->f[0] = (uint64_t)-1; -} - -static BLAKE2_INLINE void blake2b_increment_counter(blake2b_state *S, - uint64_t inc) { - S->t[0] += inc; - S->t[1] += (S->t[0] < inc); -} - -static BLAKE2_INLINE void blake2b_invalidate_state(blake2b_state *S) { - burn(S, sizeof(*S)); /* wipe */ - blake2b_set_lastblock(S); /* invalidate for further use */ -} - -static BLAKE2_INLINE void blake2b_init0(blake2b_state *S) { - memset(S, 0, sizeof(*S)); - memcpy(S->h, blake2b_IV, sizeof(S->h)); -} - - -/*void print_state(blake2b_state BlakeHash) { - printf(".h = {UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 "),\n" - "UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 "),\n" - "UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 "),\n" - "UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 ")},\n" - ".t = {UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 ")},\n" - ".f = {UINT64_C(%" PRIu64 "), UINT64_C(%" PRIu64 ")}\n", - BlakeHash.h[0], BlakeHash.h[1], BlakeHash.h[2], BlakeHash.h[3], - BlakeHash.h[4], BlakeHash.h[5], BlakeHash.h[6], BlakeHash.h[7], - BlakeHash.t[0], BlakeHash.t[1], - BlakeHash.f[0], BlakeHash.f[1]); - printf(".buf = {"); - for (register uint8_t i = 0; i < BLAKE2B_BLOCKBYTES; i++) - printf("%" PRIu8 ", ", BlakeHash.buf[i]); - puts("\n"); - printf("}\n.buflen = %d\n.outlen = %d\n", - BlakeHash.buflen, BlakeHash.outlen); - printf(".last_node = %" PRIu8 "\n", BlakeHash.last_node); - fflush(stdout); -}*/ - -static const blake2b_state miou = { - .h = { - UINT64_C(7640891576939301128), UINT64_C(13503953896175478587), - UINT64_C(4354685564936845355), UINT64_C(11912009170470909681), - UINT64_C(5840696475078001361), UINT64_C(11170449401992604703), - UINT64_C(2270897969802886507), UINT64_C(6620516959819538809) - }, - .t = {UINT64_C(0), UINT64_C(0)}, - .f = {UINT64_C(0), UINT64_C(0)}, - .buf = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }, - .buflen = 0, - .outlen = 64, - .last_node = 0 -}; - - -int blake2b_init_param(blake2b_state *S, const blake2b_param *P) { - const unsigned char *p = (const unsigned char *)P; - unsigned int i; - - if (NULL == P || NULL == S) { - return -1; - } - - blake2b_init0(S); - - /* IV XOR Parameter Block */ - for (i = 0; i < 8; ++i) { - S->h[i] ^= load64(&p[i * sizeof(S->h[i])]); - } - S->outlen = P->digest_length; - return 0; -} - -void compare_buffs(uint64_t *h, size_t outlen) -{ - // printf("CMP : %d", memcmp(h, miou.h, 8*(sizeof(uint64_t)))); - printf("miou : %" PRIu64 " - h : %" PRIu64 " - outlen : %ld\n", miou.h[0], h[0], outlen); - fflush(stdout); -} - -/* Sequential blake2b initialization */ -int blake2b_init(blake2b_state *S, size_t outlen) { - memcpy(S, &miou, sizeof(*S)); - S->h[0] += outlen; - return 0; -} - - -void print64(const char *name, const uint64_t *array, uint16_t size) { - printf("%s = {", name); - for (uint8_t i = 0; i < size; i++) printf("UINT64_C(%" PRIu64 "), ", array[i]); - printf("};\n"); -} -int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, - size_t keylen) { - return 0; -} - -static void blake2b_compress(blake2b_state *S, const uint8_t *block) { - uint64_t m[16]; - uint64_t v[16]; - unsigned int i, r; - - for (i = 0; i < 16; ++i) { - m[i] = load64(block + i * 8); - } - - for (i = 0; i < 8; ++i) { - v[i] = S->h[i]; - } - - v[8] = blake2b_IV[0]; - v[9] = blake2b_IV[1]; - v[10] = blake2b_IV[2]; - v[11] = blake2b_IV[3]; - v[12] = blake2b_IV[4] ^ S->t[0]; - v[13] = blake2b_IV[5]/* ^ S->t[1]*/; - v[14] = blake2b_IV[6] ^ S->f[0]; - v[15] = blake2b_IV[7]/* ^ S->f[1]*/; - -#define G(r, i, a, b, c, d) \ - do { \ - a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ - d = rotr64(d ^ a, 32); \ - c = c + d; \ - b = rotr64(b ^ c, 24); \ - a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ - d = rotr64(d ^ a, 16); \ - c = c + d; \ - b = rotr64(b ^ c, 63); \ - } while ((void)0, 0) - -#define ROUND(r) \ - do { \ - G(r, 0, v[0], v[4], v[8], v[12]); \ - G(r, 1, v[1], v[5], v[9], v[13]); \ - G(r, 2, v[2], v[6], v[10], v[14]); \ - G(r, 3, v[3], v[7], v[11], v[15]); \ - G(r, 4, v[0], v[5], v[10], v[15]); \ - G(r, 5, v[1], v[6], v[11], v[12]); \ - G(r, 6, v[2], v[7], v[8], v[13]); \ - G(r, 7, v[3], v[4], v[9], v[14]); \ - } while ((void)0, 0) - - for (r = 0; r < 12; ++r) ROUND(r); - - for (i = 0; i < 8; ++i) S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; - -#undef G -#undef ROUND -} - -int blake2b_update(blake2b_state *S, const void *in, size_t inlen) { - const uint8_t *pin = (const uint8_t *)in; - /* Complete current block */ - memcpy(&S->buf[4], pin, 124); - blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); - blake2b_compress(S, S->buf); - S->buflen = 0; - pin += 124; - - register int8_t i = 7; - /* Avoid buffer copies when possible */ - while (i--) { - blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); - blake2b_compress(S, pin); - pin += BLAKE2B_BLOCKBYTES; - } - memcpy(&S->buf[S->buflen], pin, 4); - S->buflen += 4; - return 0; -} - -void my_blake2b_update(blake2b_state *S, const void *in, size_t inlen) { - - memcpy(&S->buf[S->buflen], in, inlen); - S->buflen += (unsigned int)inlen; -} - -int blake2b_final(blake2b_state *S, void *out, size_t outlen) { - uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; - unsigned int i; - - blake2b_increment_counter(S, S->buflen); - blake2b_set_lastblock(S); - memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */ - blake2b_compress(S, S->buf); - - for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */ - store64(buffer + sizeof(S->h[i]) * i, S->h[i]); - } - - memcpy(out, buffer, S->outlen); - - burn(buffer, sizeof(buffer)); - burn(S->buf, sizeof(S->buf)); - burn(S->h, sizeof(S->h)); - return 0; -} - -int blake2b(void *out, const void *in, const void *key, size_t keylen) -{ - blake2b_state S; - - blake2b_init(&S, 64); - my_blake2b_update(&S, in, 64); - blake2b_final(&S, out, 64); - burn(&S, sizeof(S)); - return 0; -} - -void blake2b_too(void *pout, const void *in) -{ - uint8_t *out = (uint8_t *)pout; - uint8_t out_buffer[64]; - uint8_t in_buffer[64]; - - blake2b_state blake_state; - blake2b_init(&blake_state, 64); - blake_state.buflen = blake_state.buf[1] = 4; - my_blake2b_update(&blake_state, in, 72); - blake2b_final(&blake_state, out_buffer, 64); - memcpy(out, out_buffer, 32); - out += 32; - - register uint8_t i = 29; - while (i--) { - memcpy(in_buffer, out_buffer, 64); - blake2b(out_buffer, in_buffer, NULL, 0); - memcpy(out, out_buffer, 32); - out += 32; - } - - memcpy(in_buffer, out_buffer, 64); - blake2b(out_buffer, in_buffer, NULL, 0); - memcpy(out, out_buffer, 64); - - burn(&blake_state, sizeof(blake_state)); -} - -/* Argon2 Team - Begin Code */ -int blake2b_long(void *pout, const void *in) -{ - uint8_t *out = (uint8_t *)pout; - blake2b_state blake_state; - uint8_t outlen_bytes[sizeof(uint32_t)] = {0}; - - store32(outlen_bytes, 32); - - blake2b_init(&blake_state, 32); - my_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)); - blake2b_update(&blake_state, in, 1024); - blake2b_final(&blake_state, out, 32); - burn(&blake_state, sizeof(blake_state)); - return 0; - -} -/* Argon2 Team - End Code */ diff --git a/stratum/algos/ar2/core.c b/stratum/algos/ar2/core.c new file mode 100644 index 000000000..9256ce507 --- /dev/null +++ b/stratum/algos/ar2/core.c @@ -0,0 +1,615 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +/*For memory wiping*/ +#ifdef _MSC_VER +#include +#include /* For SecureZeroMemory */ +#endif +#if defined __STDC_LIB_EXT1__ +#define __STDC_WANT_LIB_EXT1__ 1 +#endif +#define VC_GE_2005(version) (version >= 1400) + +#include +#include +#include + +#include "core.h" +#include "thread.h" +#include "../blake2/blake2.h" +#include "../blake2/blake2-impl.h" + +#if defined(__clang__) +#if __has_attribute(optnone) +#define NOT_OPTIMIZED __attribute__((optnone)) +#endif +#elif defined(__GNUC__) +#define GCC_VERSION \ + (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#if GCC_VERSION >= 40400 +#define NOT_OPTIMIZED __attribute__((optimize("O0"))) +#endif +#endif +#ifndef NOT_OPTIMIZED +#define NOT_OPTIMIZED +#endif + +/***************Instance and Position constructors**********/ +void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); } + +void copy_block(block *dst, const block *src) { + memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK); +} + +void xor_block(block *dst, const block *src) { + int i; + for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + dst->v[i] ^= src->v[i]; + } +} + +static void load_block(block *dst, const void *input) { + unsigned i; + for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i])); + } +} + +static void store_block(void *output, const block *src) { + unsigned i; + for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]); + } +} + +/***************Memory functions*****************/ + +int allocate_memory(const argon2_context *context, uint8_t **memory, + size_t num, size_t size) { + size_t memory_size = num*size; + if (memory == NULL) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + /* 1. Check for multiplication overflow */ + if (size != 0 && memory_size / size != num) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + /* 2. Try to allocate with appropriate allocator */ + if (context->allocate_cbk) { + (context->allocate_cbk)(memory, memory_size); + } else { + *memory = malloc(memory_size); + } + + if (*memory == NULL) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + return ARGON2_OK; +} + +void free_memory(const argon2_context *context, uint8_t *memory, + size_t num, size_t size) { + size_t memory_size = num*size; + clear_internal_memory(memory, memory_size); + if (context->free_cbk) { + (context->free_cbk)(memory, memory_size); + } else { + free(memory); + } +} + +void NOT_OPTIMIZED secure_wipe_memory(void *v, size_t n) { +#if defined(_MSC_VER) && VC_GE_2005(_MSC_VER) + SecureZeroMemory(v, n); +#elif defined memset_s + memset_s(v, n, 0, n); +#elif defined(__OpenBSD__) + explicit_bzero(v, n); +#else + static void *(*const volatile memset_sec)(void *, int, size_t) = &memset; + memset_sec(v, 0, n); +#endif +} + +/* Memory clear flag defaults to true. */ +int FLAG_clear_internal_memory = 1; +void clear_internal_memory(void *v, size_t n) { + if (FLAG_clear_internal_memory && v) { + secure_wipe_memory(v, n); + } +} + +void finalize(const argon2_context *context, argon2_instance_t *instance) { + if (context != NULL && instance != NULL) { + block blockhash; + uint32_t l; + + copy_block(&blockhash, instance->memory + instance->lane_length - 1); + + /* XOR the last blocks */ + for (l = 1; l < instance->lanes; ++l) { + uint32_t last_block_in_lane = + l * instance->lane_length + (instance->lane_length - 1); + xor_block(&blockhash, instance->memory + last_block_in_lane); + } + + /* Hash the result */ + { + uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; + store_block(blockhash_bytes, &blockhash); + blake2b_long(context->out, context->outlen, blockhash_bytes, + ARGON2_BLOCK_SIZE); + /* clear blockhash and blockhash_bytes */ + clear_internal_memory(blockhash.v, ARGON2_BLOCK_SIZE); + clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); + } + + free_memory(context, (uint8_t *)instance->memory, + instance->memory_blocks, sizeof(block)); + } +} + +uint32_t index_alpha(const argon2_instance_t *instance, + const argon2_position_t *position, uint32_t pseudo_rand, + int same_lane) { + /* + * Pass 0: + * This lane : all already finished segments plus already constructed + * blocks in this segment + * Other lanes : all already finished segments + * Pass 1+: + * This lane : (SYNC_POINTS - 1) last segments plus already constructed + * blocks in this segment + * Other lanes : (SYNC_POINTS - 1) last segments + */ + uint32_t reference_area_size; + uint64_t relative_position; + uint32_t start_position, absolute_position; + + if (0 == position->pass) { + /* First pass */ + if (0 == position->slice) { + /* First slice */ + reference_area_size = + position->index - 1; /* all but the previous */ + } else { + if (same_lane) { + /* The same lane => add current segment */ + reference_area_size = + position->slice * instance->segment_length + + position->index - 1; + } else { + reference_area_size = + position->slice * instance->segment_length + + ((position->index == 0) ? (-1) : 0); + } + } + } else { + /* Second pass */ + if (same_lane) { + reference_area_size = instance->lane_length - + instance->segment_length + position->index - + 1; + } else { + reference_area_size = instance->lane_length - + instance->segment_length + + ((position->index == 0) ? (-1) : 0); + } + } + + /* 1.2.4. Mapping pseudo_rand to 0.. and produce + * relative position */ + relative_position = pseudo_rand; + relative_position = relative_position * relative_position >> 32; + relative_position = reference_area_size - 1 - + (reference_area_size * relative_position >> 32); + + /* 1.2.5 Computing starting position */ + start_position = 0; + + if (0 != position->pass) { + start_position = (position->slice == ARGON2_SYNC_POINTS - 1) + ? 0 + : (position->slice + 1) * instance->segment_length; + } + + /* 1.2.6. Computing absolute position */ + absolute_position = (start_position + relative_position) % + instance->lane_length; /* absolute position */ + return absolute_position; +} + +/* Single-threaded version for p=1 case */ +static int fill_memory_blocks_st(argon2_instance_t *instance) { + uint32_t r, s, l; + + for (r = 0; r < instance->passes; ++r) { + for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { + for (l = 0; l < instance->lanes; ++l) { + argon2_position_t position = {r, l, (uint8_t)s, 0}; + fill_segment(instance, position); + } + } + } + return ARGON2_OK; +} + +#if !defined(ARGON2_NO_THREADS) + +#ifdef _WIN32 +static unsigned __stdcall fill_segment_thr(void *thread_data) +#else +static void *fill_segment_thr(void *thread_data) +#endif +{ + argon2_thread_data *my_data = thread_data; + fill_segment(my_data->instance_ptr, my_data->pos); + argon2_thread_exit(); + return 0; +} + +/* Multi-threaded version for p > 1 case */ +static int fill_memory_blocks_mt(argon2_instance_t *instance) { + uint32_t r, s; + argon2_thread_handle_t *thread = NULL; + argon2_thread_data *thr_data = NULL; + int rc = ARGON2_OK; + + /* 1. Allocating space for threads */ + thread = calloc(instance->lanes, sizeof(argon2_thread_handle_t)); + if (thread == NULL) { + rc = ARGON2_MEMORY_ALLOCATION_ERROR; + goto fail; + } + + thr_data = calloc(instance->lanes, sizeof(argon2_thread_data)); + if (thr_data == NULL) { + rc = ARGON2_MEMORY_ALLOCATION_ERROR; + goto fail; + } + + for (r = 0; r < instance->passes; ++r) { + for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { + uint32_t l; + + /* 2. Calling threads */ + for (l = 0; l < instance->lanes; ++l) { + argon2_position_t position; + + /* 2.1 Join a thread if limit is exceeded */ + if (l >= instance->threads) { + if (argon2_thread_join(thread[l - instance->threads])) { + rc = ARGON2_THREAD_FAIL; + goto fail; + } + } + + /* 2.2 Create thread */ + position.pass = r; + position.lane = l; + position.slice = (uint8_t)s; + position.index = 0; + thr_data[l].instance_ptr = + instance; /* preparing the thread input */ + memcpy(&(thr_data[l].pos), &position, + sizeof(argon2_position_t)); + if (argon2_thread_create(&thread[l], &fill_segment_thr, + (void *)&thr_data[l])) { + rc = ARGON2_THREAD_FAIL; + goto fail; + } + + /* fill_segment(instance, position); */ + /*Non-thread equivalent of the lines above */ + } + + /* 3. Joining remaining threads */ + for (l = instance->lanes - instance->threads; l < instance->lanes; + ++l) { + if (argon2_thread_join(thread[l])) { + rc = ARGON2_THREAD_FAIL; + goto fail; + } + } + } + } + +fail: + if (thread != NULL) { + free(thread); + } + if (thr_data != NULL) { + free(thr_data); + } + return rc; +} + +#endif /* ARGON2_NO_THREADS */ + +int fill_memory_blocks(argon2_instance_t *instance) { + if (instance == NULL || instance->lanes == 0) { + return ARGON2_INCORRECT_PARAMETER; + } +#if defined(ARGON2_NO_THREADS) + return fill_memory_blocks_st(instance); +#else + return instance->threads == 1 ? + fill_memory_blocks_st(instance) : fill_memory_blocks_mt(instance); +#endif +} + +int validate_inputs(const argon2_context *context) { + if (NULL == context) { + return ARGON2_INCORRECT_PARAMETER; + } + + if (NULL == context->out) { + return ARGON2_OUTPUT_PTR_NULL; + } + + /* Validate output length */ + if (ARGON2_MIN_OUTLEN > context->outlen) { + return ARGON2_OUTPUT_TOO_SHORT; + } + + if (ARGON2_MAX_OUTLEN < context->outlen) { + return ARGON2_OUTPUT_TOO_LONG; + } + + /* Validate password (required param) */ + if (NULL == context->pwd) { + if (0 != context->pwdlen) { + return ARGON2_PWD_PTR_MISMATCH; + } + } + + if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) { + return ARGON2_PWD_TOO_SHORT; + } + + if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) { + return ARGON2_PWD_TOO_LONG; + } + + /* Validate salt (required param) */ + if (NULL == context->salt) { + if (0 != context->saltlen) { + return ARGON2_SALT_PTR_MISMATCH; + } + } + + if (ARGON2_MIN_SALT_LENGTH > context->saltlen) { + return ARGON2_SALT_TOO_SHORT; + } + + if (ARGON2_MAX_SALT_LENGTH < context->saltlen) { + return ARGON2_SALT_TOO_LONG; + } + + /* Validate secret (optional param) */ + if (NULL == context->secret) { + if (0 != context->secretlen) { + return ARGON2_SECRET_PTR_MISMATCH; + } + } else { + if (ARGON2_MIN_SECRET > context->secretlen) { + return ARGON2_SECRET_TOO_SHORT; + } + if (ARGON2_MAX_SECRET < context->secretlen) { + return ARGON2_SECRET_TOO_LONG; + } + } + + /* Validate associated data (optional param) */ + if (NULL == context->ad) { + if (0 != context->adlen) { + return ARGON2_AD_PTR_MISMATCH; + } + } else { + if (ARGON2_MIN_AD_LENGTH > context->adlen) { + return ARGON2_AD_TOO_SHORT; + } + if (ARGON2_MAX_AD_LENGTH < context->adlen) { + return ARGON2_AD_TOO_LONG; + } + } + + /* Validate memory cost */ + if (ARGON2_MIN_MEMORY > context->m_cost) { + return ARGON2_MEMORY_TOO_LITTLE; + } + + if (ARGON2_MAX_MEMORY < context->m_cost) { + return ARGON2_MEMORY_TOO_MUCH; + } + + if (context->m_cost < 8 * context->lanes) { + return ARGON2_MEMORY_TOO_LITTLE; + } + + /* Validate time cost */ + if (ARGON2_MIN_TIME > context->t_cost) { + return ARGON2_TIME_TOO_SMALL; + } + + if (ARGON2_MAX_TIME < context->t_cost) { + return ARGON2_TIME_TOO_LARGE; + } + + /* Validate lanes */ + if (ARGON2_MIN_LANES > context->lanes) { + return ARGON2_LANES_TOO_FEW; + } + + if (ARGON2_MAX_LANES < context->lanes) { + return ARGON2_LANES_TOO_MANY; + } + + /* Validate threads */ + if (ARGON2_MIN_THREADS > context->threads) { + return ARGON2_THREADS_TOO_FEW; + } + + if (ARGON2_MAX_THREADS < context->threads) { + return ARGON2_THREADS_TOO_MANY; + } + + if (NULL != context->allocate_cbk && NULL == context->free_cbk) { + return ARGON2_FREE_MEMORY_CBK_NULL; + } + + if (NULL == context->allocate_cbk && NULL != context->free_cbk) { + return ARGON2_ALLOCATE_MEMORY_CBK_NULL; + } + + return ARGON2_OK; +} + +void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) { + uint32_t l; + /* Make the first and second block in each lane as G(H0||0||i) or + G(H0||1||i) */ + uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; + for (l = 0; l < instance->lanes; ++l) { + + store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0); + store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l); + blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, + ARGON2_PREHASH_SEED_LENGTH); + load_block(&instance->memory[l * instance->lane_length + 0], + blockhash_bytes); + + store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1); + blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, + ARGON2_PREHASH_SEED_LENGTH); + load_block(&instance->memory[l * instance->lane_length + 1], + blockhash_bytes); + } + clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); +} + +void initial_hash(uint8_t *blockhash, argon2_context *context, + argon2_type type) { + blake2b_state BlakeHash; + uint8_t value[sizeof(uint32_t)]; + + if (NULL == context || NULL == blockhash) { + return; + } + + blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH); + + store32(&value, context->lanes); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->outlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->m_cost); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->t_cost); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, ARGON2_VERSION_NUMBER); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, (uint32_t)type); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->pwdlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->pwd != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->pwd, + context->pwdlen); + + if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) { + secure_wipe_memory(context->pwd, context->pwdlen); + context->pwdlen = 0; + } + } + + store32(&value, context->saltlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->salt != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->salt, + context->saltlen); + } + + store32(&value, context->secretlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->secret != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->secret, + context->secretlen); + + if (context->flags & ARGON2_FLAG_CLEAR_SECRET) { + secure_wipe_memory(context->secret, context->secretlen); + context->secretlen = 0; + } + } + + store32(&value, context->adlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->ad != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->ad, + context->adlen); + } + + blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); +} + +int initialize(argon2_instance_t *instance, argon2_context *context) { + uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; + int result = ARGON2_OK; + + if (instance == NULL || context == NULL) + return ARGON2_INCORRECT_PARAMETER; + instance->context_ptr = context; + + /* 1. Memory allocation */ + result = allocate_memory(context, (uint8_t **)&(instance->memory), + instance->memory_blocks, sizeof(block)); + if (result != ARGON2_OK) { + return result; + } + + /* 2. Initial hashing */ + /* H_0 + 8 extra bytes to produce the first blocks */ + /* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */ + /* Hashing all inputs */ + initial_hash(blockhash, context, instance->type); + /* Zeroing 8 extra bytes */ + clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, + ARGON2_PREHASH_SEED_LENGTH - + ARGON2_PREHASH_DIGEST_LENGTH); + + /* 3. Creating first blocks, we always have at least two blocks in a slice + */ + fill_first_blocks(blockhash, instance); + /* Clearing the hash */ + clear_internal_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH); + + return ARGON2_OK; +} \ No newline at end of file diff --git a/stratum/algos/ar2/cores.h b/stratum/algos/ar2/core.h similarity index 62% rename from stratum/algos/ar2/cores.h rename to stratum/algos/ar2/core.h index 8cf5dda9e..be5a787ab 100644 --- a/stratum/algos/ar2/cores.h +++ b/stratum/algos/ar2/core.h @@ -1,39 +1,39 @@ /* - * Argon2 source code package + * Argon2 reference source code package - reference C implementations * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. */ -#ifndef ARGON2_CORES_H -#define ARGON2_CORES_H +#ifndef ARGON2_CORE_H +#define ARGON2_CORE_H + +#include "argon2.h" -#if defined(_MSC_VER) -#define ALIGN(n) __declspec(align(16)) -#elif defined(__GNUC__) || defined(__clang) -#define ALIGN(x) __attribute__((__aligned__(x))) -#else -#define ALIGN(x) -#endif +#define CONST_CAST(x) (x)(uintptr_t) -/*************************Argon2 internal - * constants**************************************************/ +/**********************Argon2 internal constants*******************************/ enum argon2_core_constants { - /* Version of the algorithm */ + /* Version of the algorithm */ ARGON2_VERSION_NUMBER = 0x10, - /* Memory block size in bytes */ ARGON2_BLOCK_SIZE = 1024, - ARGON2_WORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8, - ARGON2_QWORDS_IN_BLOCK = 64, - + ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8, + ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16, + ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32, + ARGON2_512BIT_WORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 64, + /* Number of pseudo-random values generated by one call to Blake in Argon2i to generate reference block positions */ @@ -44,18 +44,14 @@ enum argon2_core_constants { ARGON2_PREHASH_SEED_LENGTH = 72 }; -/* Argon2 primitive type */ -typedef enum Argon2_type { Argon2_d = 0, Argon2_i = 1 } argon2_type; - -/*************************Argon2 internal data - * types**************************************************/ +/*************************Argon2 internal data types***********************/ /* * Structure for the (1KB) memory block implemented as 128 64-bit words. * Memory blocks can be copied, XORed. Internal words can be accessed by [] (no * bounds checking). */ -typedef struct _block { uint64_t v[ARGON2_WORDS_IN_BLOCK]; } __attribute__ ((aligned (16))) block; +typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block; /*****************Functions that work with the block******************/ @@ -76,8 +72,16 @@ void xor_block(block *dst, const block *src); */ typedef struct Argon2_instance_t { block *memory; /* Memory pointer */ + uint32_t version; + uint32_t passes; /* Number of passes */ + uint32_t memory_blocks; /* Number of blocks in memory */ + uint32_t segment_length; + uint32_t lane_length; + uint32_t lanes; + uint32_t threads; argon2_type type; int print_internals; /* whether to print the memory blocks */ + argon2_context *context_ptr; /* points back to original context */ } argon2_instance_t; /* @@ -97,32 +101,43 @@ typedef struct Argon2_thread_data { argon2_position_t pos; } argon2_thread_data; -/*************************Argon2 core - * functions**************************************************/ +/*************************Argon2 core functions********************************/ -/* Allocates memory to the given pointer +/* Allocates memory to the given pointer, uses the appropriate allocator as + * specified in the context. Total allocated memory is num*size. + * @param context argon2_context which specifies the allocator * @param memory pointer to the pointer to the memory - * @param m_cost number of blocks to allocate in the memory + * @param size the size in bytes for each element to be allocated + * @param num the number of elements to be allocated * @return ARGON2_OK if @memory is a valid pointer and memory is allocated */ -int allocate_memory(block **memory, uint32_t m_cost); +int allocate_memory(const argon2_context *context, uint8_t **memory, + size_t num, size_t size); -/* Function that securely cleans the memory +/* + * Frees memory at the given pointer, uses the appropriate deallocator as + * specified in the context. Also cleans the memory using clear_internal_memory. + * @param context argon2_context which specifies the deallocator + * @param memory pointer to buffer to be freed + * @param size the size in bytes for each element to be deallocated + * @param num the number of elements to be deallocated + */ +void free_memory(const argon2_context *context, uint8_t *memory, + size_t num, size_t size); + +/* Function that securely cleans the memory. This ignores any flags set + * regarding clearing memory. Usually one just calls clear_internal_memory. * @param mem Pointer to the memory * @param s Memory size in bytes */ void secure_wipe_memory(void *v, size_t n); -/* Clears memory - * @param instance pointer to the current instance - * @param clear_memory indicates if we clear the memory with zeros. - */ -void clear_memory(argon2_instance_t *instance, int clear); - -/* Deallocates memory - * @param memory pointer to the blocks +/* Function that securely clears the memory if FLAG_clear_internal_memory is + * set. If the flag isn't set, this function does nothing. + * @param mem Pointer to the memory + * @param s Memory size in bytes */ -void free_memory(block *memory); +void clear_internal_memory(void *v, size_t n); /* * Computes absolute position of reference block in the lane following a skewed @@ -166,7 +181,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context, * @param blockhash Pointer to the pre-hashing digest * @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values */ -void fill_firsts_blocks(uint8_t *blockhash, const argon2_instance_t *instance); +void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance); /* * Function allocates memory, hashes the inputs with Blake, and creates first @@ -196,6 +211,7 @@ void finalize(const argon2_context *context, argon2_instance_t *instance); /* * Function that fills the segment using previous segments also from other * threads + * @param context current context * @param instance Pointer to the current instance * @param position Current position * @pre all block pointers must be valid @@ -207,14 +223,8 @@ void fill_segment(const argon2_instance_t *instance, * Function that fills the entire memory t_cost times based on the first two * blocks in each lane * @param instance Pointer to the current instance + * @return ARGON2_OK if successful, @context->state */ -void fill_memory_blocks(argon2_instance_t *instance); - -/* - * Function that performs memory-hard hashing with certain degree of parallelism - * @param context Pointer to the Argon2 internal structure - * @return Error code if smth is wrong, ARGON2_OK otherwise - */ -int argon2_core(argon2_context *context, argon2_type type); +int fill_memory_blocks(argon2_instance_t *instance); -#endif +#endif \ No newline at end of file diff --git a/stratum/algos/ar2/cores.c b/stratum/algos/ar2/cores.c deleted file mode 100644 index f4b287825..000000000 --- a/stratum/algos/ar2/cores.c +++ /dev/null @@ -1,341 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -/*For memory wiping*/ -#ifdef _MSC_VER -#include -#include /* For SecureZeroMemory */ -#endif -#if defined __STDC_LIB_EXT1__ -#define __STDC_WANT_LIB_EXT1__ 1 -#endif -#define VC_GE_2005(version) (version >= 1400) - -#include -#include -#include -#include - -#include "argon2.h" -#include "cores.h" -#include "blake2/blake2.h" -#include "blake2/blake2-impl.h" - -#ifdef GENKAT -#include "genkat.h" -#endif - -#if defined(__clang__) -#if __has_attribute(optnone) -#define NOT_OPTIMIZED __attribute__((optnone)) -#endif -#elif defined(__GNUC__) -#define GCC_VERSION \ - (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) -#if GCC_VERSION >= 40400 -#define NOT_OPTIMIZED __attribute__((optimize("O0"))) -#endif -#endif -#ifndef NOT_OPTIMIZED -#define NOT_OPTIMIZED -#endif - -/***************Instance and Position constructors**********/ -void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); } - -void copy_block(block *dst, const block *src) { - memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_WORDS_IN_BLOCK); -} - -void xor_block(block *dst, const block *src) { - int i; - for (i = 0; i < ARGON2_WORDS_IN_BLOCK; ++i) { - dst->v[i] ^= src->v[i]; - } -} - -static void load_block(block *dst, const void *input) { - unsigned i; - for (i = 0; i < ARGON2_WORDS_IN_BLOCK; ++i) { - dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i])); - } -} - -static void store_block(void *output, const block *src) { - unsigned i; - for (i = 0; i < ARGON2_WORDS_IN_BLOCK; ++i) { - store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]); - } -} - -/***************Memory allocators*****************/ -int allocate_memory(block **memory, uint32_t m_cost) { - if (memory != NULL) { - size_t memory_size = sizeof(block) * m_cost; - if (m_cost != 0 && - memory_size / m_cost != - sizeof(block)) { /*1. Check for multiplication overflow*/ - return ARGON2_MEMORY_ALLOCATION_ERROR; - } - - *memory = (block *)malloc(memory_size); /*2. Try to allocate*/ - - if (!*memory) { - return ARGON2_MEMORY_ALLOCATION_ERROR; - } - - return ARGON2_OK; - } else { - return ARGON2_MEMORY_ALLOCATION_ERROR; - } -} - -void secure_wipe_memory(void *v, size_t n) { memset(v, 0, n); } - -/*********Memory functions*/ - -void clear_memory(argon2_instance_t *instance, int clear) { - if (instance->memory != NULL && clear) { - secure_wipe_memory(instance->memory, - sizeof(block) * /*instance->memory_blocks*/16); - } -} - -void free_memory(block *memory) { free(memory); } - -void finalize(const argon2_context *context, argon2_instance_t *instance) { - if (context != NULL && instance != NULL) { - block blockhash; - copy_block(&blockhash, instance->memory + 15); - - /* Hash the result */ - { - uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; - store_block(blockhash_bytes, &blockhash); - blake2b_long(context->out, blockhash_bytes); - secure_wipe_memory(blockhash.v, ARGON2_BLOCK_SIZE); - secure_wipe_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); /* clear blockhash_bytes */ - } - -#ifdef GENKAT - print_tag(context->out, context->outlen); -#endif - - /* Clear memory */ - clear_memory(instance, 1); - - free_memory(instance->memory); - } -} - -uint32_t index_alpha(const argon2_instance_t *instance, - const argon2_position_t *position, uint32_t pseudo_rand, - int same_lane) { - /* - * Pass 0: - * This lane : all already finished segments plus already constructed - * blocks in this segment - * Other lanes : all already finished segments - * Pass 1+: - * This lane : (SYNC_POINTS - 1) last segments plus already constructed - * blocks in this segment - * Other lanes : (SYNC_POINTS - 1) last segments - */ - uint32_t reference_area_size; - uint64_t relative_position; - uint32_t start_position, absolute_position; - - if (0 == position->pass) { - /* First pass */ - if (0 == position->slice) { - /* First slice */ - reference_area_size = - position->index - 1; /* all but the previous */ - } else { - if (same_lane) { - /* The same lane => add current segment */ - reference_area_size = - position->slice * 4 + - position->index - 1; - } else { - reference_area_size = - position->slice * 4 + - ((position->index == 0) ? (-1) : 0); - } - } - } else { - /* Second pass */ - if (same_lane) {reference_area_size = 11 + position->index;} - else {reference_area_size = 12 - (position->index == 0);} - } - - /* 1.2.4. Mapping pseudo_rand to 0.. and produce - * relative position */ - relative_position = pseudo_rand; - relative_position = relative_position * relative_position >> 32; - relative_position = reference_area_size - 1 - - (reference_area_size * relative_position >> 32); - - /* 1.2.5 Computing starting position */ - start_position = 0; - - if (0 != position->pass) { - start_position = (position->slice == ARGON2_SYNC_POINTS - 1) - ? 0 : (position->slice + 1) * 4; - } - - /* 1.2.6. Computing absolute position */ - absolute_position = (start_position + relative_position) % 16; - return absolute_position; -} - -void fill_memory_blocks(argon2_instance_t *instance) { - uint32_t r, s; - - for (r = 0; r < 2; ++r) { - for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { - - argon2_position_t position; - position.pass = r; - position.lane = 0; - position.slice = (uint8_t)s; - position.index = 0; - fill_segment(instance, position); - } - -#ifdef GENKAT - internal_kat(instance, r); /* Print all memory blocks */ -#endif - } -} - -void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) { - /* Make the first and second block in each lane as G(H0||i||0) or - G(H0||i||1) */ - uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; - store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0); - store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, 0); - blake2b_too(blockhash_bytes, blockhash); - load_block(&instance->memory[0], blockhash_bytes); - - store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1); - blake2b_too(blockhash_bytes, blockhash); - load_block(&instance->memory[1], blockhash_bytes); - secure_wipe_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); -} - - -static const blake2b_state base_hash = { - .h = { - UINT64_C(7640891576939301192), UINT64_C(13503953896175478587), - UINT64_C(4354685564936845355), UINT64_C(11912009170470909681), - UINT64_C(5840696475078001361), UINT64_C(11170449401992604703), - UINT64_C(2270897969802886507), UINT64_C(6620516959819538809) - }, - .t = {UINT64_C(0),UINT64_C(0)}, - .f = {UINT64_C(0),UINT64_C(0)}, - .buf = { - 1, 0, 0, 0, 32, 0, 0, 0, 16, 0, 0, 0, 2, 0, 0, 0, 16, 0, 0, 0, 1, 0, - 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - .buflen = 28, - .outlen = 64, - .last_node = 0 -}; - -#define PWDLEN 32 -#define SALTLEN 32 -#define SECRETLEN 0 -#define ADLEN 0 -void initial_hash(uint8_t *blockhash, argon2_context *context, - argon2_type type) { - - uint8_t value[sizeof(uint32_t)]; - - /* Is it generating cache invalidation between cores ? */ - blake2b_state BlakeHash = base_hash; - BlakeHash.buf[20] = (uint8_t) type; - my_blake2b_update(&BlakeHash, (const uint8_t *)context->pwd, - PWDLEN); - - - secure_wipe_memory(context->pwd, PWDLEN); - context->pwdlen = 0; - - store32(&value, SALTLEN); - my_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - my_blake2b_update(&BlakeHash, (const uint8_t *)context->salt, - SALTLEN); - - store32(&value, SECRETLEN); - my_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - store32(&value, ADLEN); - my_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); - - blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); -} - -int initialize(argon2_instance_t *instance, argon2_context *context) { - /* 1. Memory allocation */ - - - allocate_memory(&(instance->memory), 16); - - /* 2. Initial hashing */ - /* H_0 + 8 extra bytes to produce the first blocks */ - /* Hashing all inputs */ - uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; - initial_hash(blockhash, context, instance->type); - /* Zeroing 8 extra bytes */ - secure_wipe_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, - ARGON2_PREHASH_SEED_LENGTH - - ARGON2_PREHASH_DIGEST_LENGTH); - -#ifdef GENKAT - initial_kat(blockhash, context, instance->type); -#endif - - /* 3. Creating first blocks, we always have at least two blocks in a slice - */ - fill_first_blocks(blockhash, instance); - /* Clearing the hash */ - secure_wipe_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH); - - return ARGON2_OK; -} - -int argon2_core(argon2_context *context, argon2_type type) { - argon2_instance_t instance; - instance.memory = NULL; - instance.type = type; - - /* 3. Initialization: Hashing inputs, allocating memory, filling first - * blocks - */ - - int result = initialize(&instance, context); - if (ARGON2_OK != result) return result; - - /* 4. Filling memory */ - fill_memory_blocks(&instance); - - /* 5. Finalization */ - finalize(context, &instance); - - return ARGON2_OK; -} diff --git a/stratum/algos/ar2/encoding.c b/stratum/algos/ar2/encoding.c new file mode 100644 index 000000000..fae47f734 --- /dev/null +++ b/stratum/algos/ar2/encoding.c @@ -0,0 +1,455 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#include +#include +#include +#include +#include "encoding.h" +#include "core.h" + +/* + * Example code for a decoder and encoder of "hash strings", with Argon2 + * parameters. + * + * This code comprises three sections: + * + * -- The first section contains generic Base64 encoding and decoding + * functions. It is conceptually applicable to any hash function + * implementation that uses Base64 to encode and decode parameters, + * salts and outputs. It could be made into a library, provided that + * the relevant functions are made public (non-static) and be given + * reasonable names to avoid collisions with other functions. + * + * -- The second section is specific to Argon2. It encodes and decodes + * the parameters, salts and outputs. It does not compute the hash + * itself. + * + * The code was originally written by Thomas Pornin , + * to whom comments and remarks may be sent. It is released under what + * should amount to Public Domain or its closest equivalent; the + * following mantra is supposed to incarnate that fact with all the + * proper legal rituals: + * + * --------------------------------------------------------------------- + * This file is provided under the terms of Creative Commons CC0 1.0 + * Public Domain Dedication. To the extent possible under law, the + * author (Thomas Pornin) has waived all copyright and related or + * neighboring rights to this file. This work is published from: Canada. + * --------------------------------------------------------------------- + * + * Copyright (c) 2015 Thomas Pornin + */ + +/* ==================================================================== */ +/* + * Common code; could be shared between different hash functions. + * + * Note: the Base64 functions below assume that uppercase letters (resp. + * lowercase letters) have consecutive numerical codes, that fit on 8 + * bits. All modern systems use ASCII-compatible charsets, where these + * properties are true. If you are stuck with a dinosaur of a system + * that still defaults to EBCDIC then you already have much bigger + * interoperability issues to deal with. + */ + +/* + * Some macros for constant-time comparisons. These work over values in + * the 0..255 range. Returned value is 0x00 on "false", 0xFF on "true". + */ +#define EQ(x, y) ((((0U - ((unsigned)(x) ^ (unsigned)(y))) >> 8) & 0xFF) ^ 0xFF) +#define GT(x, y) ((((unsigned)(y) - (unsigned)(x)) >> 8) & 0xFF) +#define GE(x, y) (GT(y, x) ^ 0xFF) +#define LT(x, y) GT(y, x) +#define LE(x, y) GE(y, x) + +/* + * Convert value x (0..63) to corresponding Base64 character. + */ +static int b64_byte_to_char(unsigned x) { + return (LT(x, 26) & (x + 'A')) | + (GE(x, 26) & LT(x, 52) & (x + ('a' - 26))) | + (GE(x, 52) & LT(x, 62) & (x + ('0' - 52))) | (EQ(x, 62) & '+') | + (EQ(x, 63) & '/'); +} + +/* + * Convert character c to the corresponding 6-bit value. If character c + * is not a Base64 character, then 0xFF (255) is returned. + */ +static unsigned b64_char_to_byte(int c) { + unsigned x; + + x = (GE(c, 'A') & LE(c, 'Z') & (c - 'A')) | + (GE(c, 'a') & LE(c, 'z') & (c - ('a' - 26))) | + (GE(c, '0') & LE(c, '9') & (c - ('0' - 52))) | (EQ(c, '+') & 62) | + (EQ(c, '/') & 63); + return x | (EQ(x, 0) & (EQ(c, 'A') ^ 0xFF)); +} + +/* + * Convert some bytes to Base64. 'dst_len' is the length (in characters) + * of the output buffer 'dst'; if that buffer is not large enough to + * receive the result (including the terminating 0), then (size_t)-1 + * is returned. Otherwise, the zero-terminated Base64 string is written + * in the buffer, and the output length (counted WITHOUT the terminating + * zero) is returned. + */ +static size_t to_base64(char *dst, size_t dst_len, const void *src, + size_t src_len) { + size_t olen; + const unsigned char *buf; + unsigned acc, acc_len; + + olen = (src_len / 3) << 2; + switch (src_len % 3) { + case 2: + olen++; + /* fall through */ + case 1: + olen += 2; + break; + } + if (dst_len <= olen) { + return (size_t)-1; + } + acc = 0; + acc_len = 0; + buf = (const unsigned char *)src; + while (src_len-- > 0) { + acc = (acc << 8) + (*buf++); + acc_len += 8; + while (acc_len >= 6) { + acc_len -= 6; + *dst++ = (char)b64_byte_to_char((acc >> acc_len) & 0x3F); + } + } + if (acc_len > 0) { + *dst++ = (char)b64_byte_to_char((acc << (6 - acc_len)) & 0x3F); + } + *dst++ = 0; + return olen; +} + +/* + * Decode Base64 chars into bytes. The '*dst_len' value must initially + * contain the length of the output buffer '*dst'; when the decoding + * ends, the actual number of decoded bytes is written back in + * '*dst_len'. + * + * Decoding stops when a non-Base64 character is encountered, or when + * the output buffer capacity is exceeded. If an error occurred (output + * buffer is too small, invalid last characters leading to unprocessed + * buffered bits), then NULL is returned; otherwise, the returned value + * points to the first non-Base64 character in the source stream, which + * may be the terminating zero. + */ +static const char *from_base64(void *dst, size_t *dst_len, const char *src) { + size_t len; + unsigned char *buf; + unsigned acc, acc_len; + + buf = (unsigned char *)dst; + len = 0; + acc = 0; + acc_len = 0; + for (;;) { + unsigned d; + + d = b64_char_to_byte(*src); + if (d == 0xFF) { + break; + } + src++; + acc = (acc << 6) + d; + acc_len += 6; + if (acc_len >= 8) { + acc_len -= 8; + if ((len++) >= *dst_len) { + return NULL; + } + *buf++ = (acc >> acc_len) & 0xFF; + } + } + + /* + * If the input length is equal to 1 modulo 4 (which is + * invalid), then there will remain 6 unprocessed bits; + * otherwise, only 0, 2 or 4 bits are buffered. The buffered + * bits must also all be zero. + */ + if (acc_len > 4 || (acc & (((unsigned)1 << acc_len) - 1)) != 0) { + return NULL; + } + *dst_len = len; + return src; +} + +/* + * Decode decimal integer from 'str'; the value is written in '*v'. + * Returned value is a pointer to the next non-decimal character in the + * string. If there is no digit at all, or the value encoding is not + * minimal (extra leading zeros), or the value does not fit in an + * 'unsigned long', then NULL is returned. + */ +static const char *decode_decimal(const char *str, unsigned long *v) { + const char *orig; + unsigned long acc; + + acc = 0; + for (orig = str;; str++) { + int c; + + c = *str; + if (c < '0' || c > '9') { + break; + } + c -= '0'; + if (acc > (ULONG_MAX / 10)) { + return NULL; + } + acc *= 10; + if ((unsigned long)c > (ULONG_MAX - acc)) { + return NULL; + } + acc += (unsigned long)c; + } + if (str == orig || (*orig == '0' && str != (orig + 1))) { + return NULL; + } + *v = acc; + return str; +} + +/* ==================================================================== */ +/* + * Code specific to Argon2. + * + * The code below applies the following format: + * + * $argon2[$v=]$m=,t=,p=$$ + * + * where is either 'd', 'id', or 'i', is a decimal integer (positive, + * fits in an 'unsigned long'), and is Base64-encoded data (no '=' padding + * characters, no newline or whitespace). + * + * The last two binary chunks (encoded in Base64) are, in that order, + * the salt and the output. Both are required. The binary salt length and the + * output length must be in the allowed ranges defined in argon2.h. + * + * The ctx struct must contain buffers large enough to hold the salt and pwd + * when it is fed into decode_string. + */ + +int decode_string(argon2_context *ctx, const char *str, argon2_type type) { + +/* check for prefix */ +#define CC(prefix) \ + do { \ + size_t cc_len = strlen(prefix); \ + if (strncmp(str, prefix, cc_len) != 0) { \ + return ARGON2_DECODING_FAIL; \ + } \ + str += cc_len; \ + } while ((void)0, 0) + +/* optional prefix checking with supplied code */ +#define CC_opt(prefix, code) \ + do { \ + size_t cc_len = strlen(prefix); \ + if (strncmp(str, prefix, cc_len) == 0) { \ + str += cc_len; \ + { code; } \ + } \ + } while ((void)0, 0) + +/* Decoding prefix into decimal */ +#define DECIMAL(x) \ + do { \ + unsigned long dec_x; \ + str = decode_decimal(str, &dec_x); \ + if (str == NULL) { \ + return ARGON2_DECODING_FAIL; \ + } \ + (x) = dec_x; \ + } while ((void)0, 0) + + +/* Decoding prefix into uint32_t decimal */ +#define DECIMAL_U32(x) \ + do { \ + unsigned long dec_x; \ + str = decode_decimal(str, &dec_x); \ + if (str == NULL || dec_x > UINT32_MAX) { \ + return ARGON2_DECODING_FAIL; \ + } \ + (x) = (uint32_t)dec_x; \ + } while ((void)0, 0) + + +/* Decoding base64 into a binary buffer */ +#define BIN(buf, max_len, len) \ + do { \ + size_t bin_len = (max_len); \ + str = from_base64(buf, &bin_len, str); \ + if (str == NULL || bin_len > UINT32_MAX) { \ + return ARGON2_DECODING_FAIL; \ + } \ + (len) = (uint32_t)bin_len; \ + } while ((void)0, 0) + + size_t maxsaltlen = ctx->saltlen; + size_t maxoutlen = ctx->outlen; + int validation_result; + const char* type_string; + + /* We should start with the argon2_type we are using */ + type_string = argon2_type2string(type, 0); + if (!type_string) { + return ARGON2_INCORRECT_TYPE; + } + + CC("$"); + CC(type_string); + + CC("$m="); + DECIMAL_U32(ctx->m_cost); + CC(",t="); + DECIMAL_U32(ctx->t_cost); + CC(",p="); + DECIMAL_U32(ctx->lanes); + ctx->threads = ctx->lanes; + + CC("$"); + BIN(ctx->salt, maxsaltlen, ctx->saltlen); + CC("$"); + BIN(ctx->out, maxoutlen, ctx->outlen); + + /* The rest of the fields get the default values */ + ctx->secret = NULL; + ctx->secretlen = 0; + ctx->ad = NULL; + ctx->adlen = 0; + ctx->allocate_cbk = NULL; + ctx->free_cbk = NULL; + ctx->flags = ARGON2_DEFAULT_FLAGS; + + /* On return, must have valid context */ + validation_result = validate_inputs(ctx); + if (validation_result != ARGON2_OK) { + return validation_result; + } + + /* Can't have any additional characters */ + if (*str == 0) { + return ARGON2_OK; + } else { + return ARGON2_DECODING_FAIL; + } +#undef CC +#undef CC_opt +#undef DECIMAL +#undef BIN +} + +int encode_string(char *dst, size_t dst_len, argon2_context *ctx, + argon2_type type) { +#define SS(str) \ + do { \ + size_t pp_len = strlen(str); \ + if (pp_len >= dst_len) { \ + return ARGON2_ENCODING_FAIL; \ + } \ + memcpy(dst, str, pp_len + 1); \ + dst += pp_len; \ + dst_len -= pp_len; \ + } while ((void)0, 0) + +#define SX(x) \ + do { \ + char tmp[30]; \ + sprintf(tmp, "%lu", (unsigned long)(x)); \ + SS(tmp); \ + } while ((void)0, 0) + +#define SB(buf, len) \ + do { \ + size_t sb_len = to_base64(dst, dst_len, buf, len); \ + if (sb_len == (size_t)-1) { \ + return ARGON2_ENCODING_FAIL; \ + } \ + dst += sb_len; \ + dst_len -= sb_len; \ + } while ((void)0, 0) + + const char* type_string = argon2_type2string(type, 0); + int validation_result = validate_inputs(ctx); + + if (!type_string) { + return ARGON2_ENCODING_FAIL; + } + + if (validation_result != ARGON2_OK) { + return validation_result; + } + + + SS("$"); + SS(type_string); + + SS("$m="); + SX(ctx->m_cost); + SS(",t="); + SX(ctx->t_cost); + SS(",p="); + SX(ctx->lanes); + + SS("$"); + SB(ctx->salt, ctx->saltlen); + + SS("$"); + SB(ctx->out, ctx->outlen); + return ARGON2_OK; + +#undef SS +#undef SX +#undef SB +} + +size_t b64len(uint32_t len) { + size_t olen = ((size_t)len / 3) << 2; + + switch (len % 3) { + case 2: + olen++; + /* fall through */ + case 1: + olen += 2; + break; + } + + return olen; +} + +size_t numlen(uint32_t num) { + size_t len = 1; + while (num >= 10) { + ++len; + num = num / 10; + } + return len; +} diff --git a/stratum/algos/ar2/encoding.h b/stratum/algos/ar2/encoding.h new file mode 100644 index 000000000..580af7580 --- /dev/null +++ b/stratum/algos/ar2/encoding.h @@ -0,0 +1,57 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef ENCODING_H +#define ENCODING_H +#include "argon2.h" + +#define ARGON2_MAX_DECODED_LANES UINT32_C(255) +#define ARGON2_MIN_DECODED_SALT_LEN UINT32_C(8) +#define ARGON2_MIN_DECODED_OUT_LEN UINT32_C(12) + +/* +* encode an Argon2 hash string into the provided buffer. 'dst_len' +* contains the size, in characters, of the 'dst' buffer; if 'dst_len' +* is less than the number of required characters (including the +* terminating 0), then this function returns ARGON2_ENCODING_ERROR. +* +* on success, ARGON2_OK is returned. +*/ +int encode_string(char *dst, size_t dst_len, argon2_context *ctx, + argon2_type type); + +/* +* Decodes an Argon2 hash string into the provided structure 'ctx'. +* The only fields that must be set prior to this call are ctx.saltlen and +* ctx.outlen (which must be the maximal salt and out length values that are +* allowed), ctx.salt and ctx.out (which must be buffers of the specified +* length), and ctx.pwd and ctx.pwdlen which must hold a valid password. +* +* Invalid input string causes an error. On success, the ctx is valid and all +* fields have been initialized. +* +* Returned value is ARGON2_OK on success, other ARGON2_ codes on error. +*/ +int decode_string(argon2_context *ctx, const char *str, argon2_type type); + +/* Returns the length of the encoded byte stream with length len */ +size_t b64len(uint32_t len); + +/* Returns the length of the encoded number num */ +size_t numlen(uint32_t num); + +#endif \ No newline at end of file diff --git a/stratum/algos/ar2/genkat.c b/stratum/algos/ar2/genkat.c deleted file mode 100644 index 7aa482d88..000000000 --- a/stratum/algos/ar2/genkat.c +++ /dev/null @@ -1,182 +0,0 @@ -#include -#include -#include -#include - -#include "argon2.h" -#include "cores.h" - -void initial_kat(const uint8_t *blockhash, const argon2_context *context, - argon2_type type) { - unsigned i; - - if (blockhash != NULL && context != NULL) { - printf("======================================="); - - switch (type) { - case Argon2_d: - printf("Argon2d\n"); - break; - - case Argon2_i: - printf("Argon2i\n"); - break; - - default: - break; - } - - printf("Memory: %u KiB, Iterations: %u, Parallelism: %u lanes, Tag " - "length: %u bytes\n", - context->m_cost, context->t_cost, context->lanes, - context->outlen); - - printf("Password[%u]: ", context->pwdlen); - - if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) { - printf("CLEARED\n"); - } else { - for (i = 0; i < context->pwdlen; ++i) { - printf("%2.2x ", ((unsigned char *)context->pwd)[i]); - } - - printf("\n"); - } - - printf("Salt[%u]: ", context->saltlen); - - for (i = 0; i < context->saltlen; ++i) { - printf("%2.2x ", ((unsigned char *)context->salt)[i]); - } - - printf("\n"); - - printf("Secret[%u]: ", context->secretlen); - - if (context->flags & ARGON2_FLAG_CLEAR_SECRET) { - printf("CLEARED\n"); - } else { - for (i = 0; i < context->secretlen; ++i) { - printf("%2.2x ", ((unsigned char *)context->secret)[i]); - } - - printf("\n"); - } - - printf("Associated data[%u]: ", context->adlen); - - for (i = 0; i < context->adlen; ++i) { - printf("%2.2x ", ((unsigned char *)context->ad)[i]); - } - - printf("\n"); - - printf("Pre-hashing digest: "); - - for (i = 0; i < ARGON2_PREHASH_DIGEST_LENGTH; ++i) { - printf("%2.2x ", ((unsigned char *)blockhash)[i]); - } - - printf("\n"); - } -} - -void print_tag(const void *out, uint32_t outlen) { - unsigned i; - if (out != NULL) { - printf("Tag: "); - - for (i = 0; i < outlen; ++i) { - printf("%2.2x ", ((uint8_t *)out)[i]); - } - - printf("\n"); - } -} - -void internal_kat(const argon2_instance_t *instance, uint32_t pass) { - - if (instance != NULL) { - uint32_t i, j; - printf("\n After pass %u:\n", pass); - - for (i = 0; i < instance->memory_blocks; ++i) { - uint32_t how_many_words = - (instance->memory_blocks > ARGON2_WORDS_IN_BLOCK) - ? 1 - : ARGON2_WORDS_IN_BLOCK; - - for (j = 0; j < how_many_words; ++j) - printf("Block %.4u [%3u]: %016" PRIx64 "\n", i, j, - instance->memory[i].v[j]); - } - } -} - -static void fatal(const char *error) { - fprintf(stderr, "Error: %s\n", error); - exit(1); -} - -static void generate_testvectors(const char *type) { -#define TEST_OUTLEN 32 -#define TEST_PWDLEN 32 -#define TEST_SALTLEN 16 -#define TEST_SECRETLEN 8 -#define TEST_ADLEN 12 - argon2_context context; - - unsigned char out[TEST_OUTLEN]; - unsigned char pwd[TEST_PWDLEN]; - unsigned char salt[TEST_SALTLEN]; - unsigned char secret[TEST_SECRETLEN]; - unsigned char ad[TEST_ADLEN]; - const allocate_fptr myown_allocator = NULL; - const deallocate_fptr myown_deallocator = NULL; - - unsigned t_cost = 3; - unsigned m_cost = 16; - unsigned lanes = 4; - - memset(pwd, 1, TEST_OUTLEN); - memset(salt, 2, TEST_SALTLEN); - memset(secret, 3, TEST_SECRETLEN); - memset(ad, 4, TEST_ADLEN); - - context.out = out; - context.outlen = TEST_OUTLEN; - context.pwd = pwd; - context.pwdlen = TEST_PWDLEN; - context.salt = salt; - context.saltlen = TEST_SALTLEN; - context.secret = secret; - context.secretlen = TEST_SECRETLEN; - context.ad = ad; - context.adlen = TEST_ADLEN; - context.t_cost = t_cost; - context.m_cost = m_cost; - context.lanes = lanes; - context.threads = lanes; - context.allocate_cbk = myown_allocator; - context.free_cbk = myown_deallocator; - context.flags = 0; - -#undef TEST_OUTLEN -#undef TEST_PWDLEN -#undef TEST_SALTLEN -#undef TEST_SECRETLEN -#undef TEST_ADLEN - - if (!strcmp(type, "d")) { - argon2d(&context); - } else if (!strcmp(type, "i")) { - argon2i(&context); - } else - fatal("wrong Argon2 type"); -} - -int main(int argc, char *argv[]) { - const char *type = (argc > 1) ? argv[1] : "i"; - generate_testvectors(type); - return ARGON2_OK; -} diff --git a/stratum/algos/ar2/genkat.h b/stratum/algos/ar2/genkat.h deleted file mode 100644 index 9c776bf52..000000000 --- a/stratum/algos/ar2/genkat.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#ifndef ARGON2_KAT_H -#define ARGON2_KAT_H - -/* - * Initial KAT function that prints the inputs to the file - * @param blockhash Array that contains pre-hashing digest - * @param context Holds inputs - * @param type Argon2 type - * @pre blockhash must point to INPUT_INITIAL_HASH_LENGTH bytes - * @pre context member pointers must point to allocated memory of size according - * to the length values - */ -void initial_kat(const uint8_t *blockhash, const argon2_context *context, - argon2_type type); - -/* - * Function that prints the output tag - * @param out output array pointer - * @param outlen digest length - * @pre out must point to @a outlen bytes - **/ -void print_tag(const void *out, uint32_t outlen); - -/* - * Function that prints the internal state at given moment - * @param instance pointer to the current instance - * @param pass current pass number - * @pre instance must have necessary memory allocated - **/ -void internal_kat(const argon2_instance_t *instance, uint32_t pass); - -#endif diff --git a/stratum/algos/ar2/opt.c b/stratum/algos/ar2/opt.c index 755a89444..2ff7b3f50 100644 --- a/stratum/algos/ar2/opt.c +++ b/stratum/algos/ar2/opt.c @@ -1,150 +1,214 @@ /* - * Argon2 source code package + * Argon2 reference source code package - reference C implementations * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. */ #include #include #include -#include -#include - -#include #include "argon2.h" -#include "cores.h" -#include "opt.h" - -#include "blake2/blake2.h" -#include "blake2/blamka-round-opt.h" - -void fill_block(__m128i *state, __m128i const *ref_block, __m128i *next_block) -{ - __m128i block_XY[ARGON2_QWORDS_IN_BLOCK] __attribute__ ((aligned (16))); - uint32_t i; - for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; i++) { - block_XY[i] = state[i] = _mm_xor_si128( - state[i], _mm_load_si128(&ref_block[i])); - } - - BLAKE2_ROUND(state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7]); - BLAKE2_ROUND(state[8], state[9], state[10], state[11], state[12], state[13], state[14], state[15]); - BLAKE2_ROUND(state[16], state[17], state[18], state[19], state[20], state[21], state[22], state[23]); - BLAKE2_ROUND(state[24], state[25], state[26], state[27], state[28], state[29], state[30], state[31]); - BLAKE2_ROUND(state[32], state[33], state[34], state[35], state[36], state[37], state[38], state[39]); - BLAKE2_ROUND(state[40], state[41], state[42], state[43], state[44], state[45], state[46], state[47]); - BLAKE2_ROUND(state[48], state[49], state[50], state[51], state[52], state[53], state[54], state[55]); - BLAKE2_ROUND(state[56], state[57], state[58], state[59], state[60], state[61], state[62], state[63]); - /*for (i = 0; i < 8; ++i) { - BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], - state[8 * i + 3], state[8 * i + 4], state[8 * i + 5], - state[8 * i + 6], state[8 * i + 7]); - }*/ - - BLAKE2_ROUND(state[0], state[8], state[16], state[24], state[32], state[40], state[48], state[56]); - BLAKE2_ROUND(state[1], state[9], state[17], state[25], state[33], state[41], state[49], state[57]); - BLAKE2_ROUND(state[2], state[10], state[18], state[26], state[34], state[42], state[50], state[58]); - BLAKE2_ROUND(state[3], state[11], state[19], state[27], state[35], state[43], state[51], state[59]); - BLAKE2_ROUND(state[4], state[12], state[20], state[28], state[36], state[44], state[52], state[60]); - BLAKE2_ROUND(state[5], state[13], state[21], state[29], state[37], state[45], state[53], state[61]); - BLAKE2_ROUND(state[6], state[14], state[22], state[30], state[38], state[46], state[54], state[62]); - BLAKE2_ROUND(state[7], state[15], state[23], state[31], state[39], state[47], state[55], state[63]); - /*for (i = 0; i < 8; ++i) { - BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], - state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i], - state[8 * 6 + i], state[8 * 7 + i]); - }*/ +#include "core.h" - for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; i++) { - state[i] = _mm_xor_si128(state[i], block_XY[i]); - _mm_storeu_si128(&next_block[i], state[i]); - } -} +#include "../blake2/blake2.h" +#include "../blake2/blamka-round-opt.h" -static const uint64_t bad_rands[32] = { - UINT64_C(17023632018251376180), UINT64_C(4911461131397773491), - UINT64_C(15927076453364631751), UINT64_C(7860239898779391109), +/* + * Function fills a new memory block and optionally XORs the old block over the new one. + * Memory must be initialized. + * @param state Pointer to the just produced block. Content will be updated(!) + * @param ref_block Pointer to the reference block + * @param next_block Pointer to the block to be XORed over. May coincide with @ref_block + * @param with_xor Whether to XOR into the new block (1) or just overwrite (0) + * @pre all block pointers must be valid + */ +#if defined(__AVX512F__) +static void fill_block(__m512i *state, const block *ref_block, + block *next_block, int with_xor) { + __m512i block_XY[ARGON2_512BIT_WORDS_IN_BLOCK]; + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { + state[i] = _mm512_xor_si512( + state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); + block_XY[i] = _mm512_xor_si512( + state[i], _mm512_loadu_si512((const __m512i *)next_block->v + i)); + } + } else { + for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { + block_XY[i] = state[i] = _mm512_xor_si512( + state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); + } + } - UINT64_C(11820267568857244377), UINT64_C(12188179869468676617), - UINT64_C(3732913385414474778), UINT64_C(7651458777762572084), + for (i = 0; i < 2; ++i) { + BLAKE2_ROUND_1( + state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); + } - UINT64_C(3062274162574341415), UINT64_C(17922653540258786897), - UINT64_C(17393848266100524980), UINT64_C(8539695715554563839), + for (i = 0; i < 2; ++i) { + BLAKE2_ROUND_2( + state[2 * 0 + i], state[2 * 1 + i], state[2 * 2 + i], state[2 * 3 + i], + state[2 * 4 + i], state[2 * 5 + i], state[2 * 6 + i], state[2 * 7 + i]); + } - UINT64_C(13824538050656654359), UINT64_C(12078939433126460936), - UINT64_C(15331979418564540430), UINT64_C(12058346794217174273), + for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { + state[i] = _mm512_xor_si512(state[i], block_XY[i]); + _mm512_storeu_si512((__m512i *)next_block->v + i, state[i]); + } +} +#elif defined(__AVX2__) +static void fill_block(__m256i *state, const block *ref_block, + block *next_block, int with_xor) { + __m256i block_XY[ARGON2_HWORDS_IN_BLOCK]; + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { + state[i] = _mm256_xor_si256( + state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); + block_XY[i] = _mm256_xor_si256( + state[i], _mm256_loadu_si256((const __m256i *)next_block->v + i)); + } + } else { + for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { + block_XY[i] = state[i] = _mm256_xor_si256( + state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); + } + } - UINT64_C(13593922096015221049), UINT64_C(18356682276374416500), - UINT64_C(4968040514092703824), UINT64_C(11202790346130235567), + for (i = 0; i < 4; ++i) { + BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], + state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); + } - UINT64_C(2276229735041314644), UINT64_C(220837743321691382), - UINT64_C(4861211596230784273), UINT64_C(6330592584132590331), + for (i = 0; i < 4; ++i) { + BLAKE2_ROUND_2(state[ 0 + i], state[ 4 + i], state[ 8 + i], state[12 + i], + state[16 + i], state[20 + i], state[24 + i], state[28 + i]); + } - UINT64_C(3515580430960296763), UINT64_C(9869356316971855173), - UINT64_C(485533243489193056), UINT64_C(14596447761048148032), + for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { + state[i] = _mm256_xor_si256(state[i], block_XY[i]); + _mm256_storeu_si256((__m256i *)next_block->v + i, state[i]); + } +} +#else +static void fill_block(__m128i *state, const block *ref_block, + block *next_block, int with_xor) { + __m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + state[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); + block_XY[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i *)next_block->v + i)); + } + } else { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + block_XY[i] = state[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); + } + } - UINT64_C(16531790085730132900), UINT64_C(17328824500878824371), - UINT64_C(8548260058287621283), UINT64_C(8641748798041936364) -}; + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], + state[8 * i + 3], state[8 * i + 4], state[8 * i + 5], + state[8 * i + 6], state[8 * i + 7]); + } -void generate_addresses(const argon2_instance_t *instance, - const argon2_position_t *position, - uint64_t *pseudo_rands) -{ - uint8_t offset = position->pass * 16 + position->slice * 4; - pseudo_rands[0] = bad_rands[offset++]; - pseudo_rands[1] = bad_rands[offset++]; - pseudo_rands[2] = bad_rands[offset++]; - pseudo_rands[3] = bad_rands[offset++]; + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], + state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i], + state[8 * 6 + i], state[8 * 7 + i]); + } - /*if ((position->pass == 1 && position->slice == 3)) - print64("pseudo_rands", pseudo_rands, 4);*/ + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + state[i] = _mm_xor_si128(state[i], block_XY[i]); + _mm_storeu_si128((__m128i *)next_block->v + i, state[i]); + } +} +#endif + +static void next_addresses(block *address_block, block *input_block) { + /*Temporary zero-initialized blocks*/ +#if defined(__AVX512F__) + __m512i zero_block[ARGON2_512BIT_WORDS_IN_BLOCK]; + __m512i zero2_block[ARGON2_512BIT_WORDS_IN_BLOCK]; +#elif defined(__AVX2__) + __m256i zero_block[ARGON2_HWORDS_IN_BLOCK]; + __m256i zero2_block[ARGON2_HWORDS_IN_BLOCK]; +#else + __m128i zero_block[ARGON2_OWORDS_IN_BLOCK]; + __m128i zero2_block[ARGON2_OWORDS_IN_BLOCK]; +#endif + + memset(zero_block, 0, sizeof(zero_block)); + memset(zero2_block, 0, sizeof(zero2_block)); + + /*Increasing index counter*/ + input_block->v[6]++; + + /*First iteration of G*/ + fill_block(zero_block, input_block, address_block, 0); + + /*Second iteration of G*/ + fill_block(zero2_block, address_block, address_block, 0); } - -#define SEGMENT_LENGTH 4 -#define LANE_LENGTH 16 -#define POS_LANE 0 void fill_segment(const argon2_instance_t *instance, - argon2_position_t position) -{ + argon2_position_t position) { block *ref_block = NULL, *curr_block = NULL; - uint64_t pseudo_rand, ref_index; + block address_block, input_block; + uint64_t pseudo_rand, ref_index, ref_lane; uint32_t prev_offset, curr_offset; - uint8_t i; - __m128i state[64]; - int data_independent_addressing = (instance->type == Argon2_i); - - /* Pseudo-random values that determine the reference block position */ - uint64_t *pseudo_rands = NULL; - - pseudo_rands = (uint64_t *)malloc(/*sizeof(uint64_t) * 4*/32); - - if (data_independent_addressing) { - generate_addresses(instance, &position, pseudo_rands); + uint32_t starting_index, i; +#if defined(__AVX512F__) + __m512i state[ARGON2_512BIT_WORDS_IN_BLOCK]; +#elif defined(__AVX2__) + __m256i state[ARGON2_HWORDS_IN_BLOCK]; +#else + __m128i state[ARGON2_OWORDS_IN_BLOCK]; +#endif + int data_independent_addressing; + + if (instance == NULL) { + return; } - i = 0; + starting_index = 0; if ((0 == position.pass) && (0 == position.slice)) { - i = 2; /* we have already generated the first two blocks */ + starting_index = 2; /* we have already generated the first two blocks */ + + /* Don't forget to generate the first block of addresses: */ + if (data_independent_addressing) { + next_addresses(&address_block, &input_block); + } } - /*printf("Position.lane = %d\nPosition.slice = %d\nStarting index : %d\n", position.lane, position.slice, starting_index);*/ /* Offset of the current block */ - curr_offset = position.slice * 4 + i; + curr_offset = position.lane * instance->lane_length + + position.slice * instance->segment_length + starting_index; - if (0 == curr_offset % 16) { + if (0 == curr_offset % instance->lane_length) { /* Last block in this lane */ - prev_offset = curr_offset + /*instance->lane_length - 1*/15; + prev_offset = curr_offset + instance->lane_length - 1; } else { /* Previous block */ prev_offset = curr_offset - 1; @@ -152,34 +216,45 @@ void fill_segment(const argon2_instance_t *instance, memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); - for (; i < SEGMENT_LENGTH; + for (i = starting_index; i < instance->segment_length; ++i, ++curr_offset, ++prev_offset) { /*1.1 Rotating prev_offset if needed */ - if (curr_offset % LANE_LENGTH == 1) { + if (curr_offset % instance->lane_length == 1) { prev_offset = curr_offset - 1; } /* 1.2 Computing the index of the reference block */ /* 1.2.1 Taking pseudo-random value from the previous block */ if (data_independent_addressing) { - pseudo_rand = pseudo_rands[i]; + if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { + next_addresses(&address_block, &input_block); + } + pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; } else { pseudo_rand = instance->memory[prev_offset].v[0]; } /* 1.2.2 Computing the lane of the reference block */ + ref_lane = ((pseudo_rand >> 32)) % instance->lanes; + + if ((position.pass == 0) && (position.slice == 0)) { + /* Can not reference other lanes yet */ + ref_lane = position.lane; + } /* 1.2.3 Computing the number of possible reference block within the * lane. */ position.index = i; - ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,1); + ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, + ref_lane == position.lane); /* 2 Creating a new block */ - ref_block = instance->memory + ref_index; + ref_block = + instance->memory + instance->lane_length * ref_lane + ref_index; curr_block = instance->memory + curr_offset; - fill_block(state, (__m128i const *)ref_block->v, (__m128i *)curr_block->v); - } + + fill_block(state, ref_block, curr_block, 0); - free(pseudo_rands); -} + } +} \ No newline at end of file diff --git a/stratum/algos/ar2/opt.h b/stratum/algos/ar2/opt.h deleted file mode 100644 index ec89b960e..000000000 --- a/stratum/algos/ar2/opt.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#ifndef ARGON2_OPT_H -#define ARGON2_OPT_H - -/* - * Function fills a new memory block. Differs from the - * @param state Pointer to the just produced block. Content will be updated(!) - * @param ref_block Pointer to the reference block - * @param next_block Pointer to the block to be constructed - * @pre all block pointers must be valid - */ -void fill_block(__m128i *state, __m128i const *ref_block, __m128i *next_block); - -/* - * Generate pseudo-random values to reference blocks in the segment and puts - * them into the array - * @param instance Pointer to the current instance - * @param position Pointer to the current position - * @param pseudo_rands Pointer to the array of 64-bit values - * @pre pseudo_rands must point to @a instance->segment_length allocated values - */ -void generate_addresses(const argon2_instance_t *instance, - const argon2_position_t *position, - uint64_t *pseudo_rands); - -/* - * Function that fills the segment using previous segments also from other - * threads. - * Identical to the reference code except that it calls optimized FillBlock() - * @param instance Pointer to the current instance - * @param position Current position - * @pre all block pointers must be valid - */ -void fill_segment(const argon2_instance_t *instance, - argon2_position_t position); - -#endif /* ARGON2_OPT_H */ diff --git a/stratum/algos/ar2/ref.c b/stratum/algos/ar2/ref.c deleted file mode 100644 index 98ae07c9a..000000000 --- a/stratum/algos/ar2/ref.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#include -#include -#include - -#include "argon2.h" -#include "cores.h" -#include "ref.h" - -#include "blake2/blamka-round-ref.h" -#include "blake2/blake2-impl.h" -#include "blake2/blake2.h" - -void fill_block(const block *prev_block, const block *ref_block, - block *next_block) { - block blockR, block_tmp; - unsigned i; - - copy_block(&blockR, ref_block); - xor_block(&blockR, prev_block); - copy_block(&block_tmp, &blockR); - - /* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then - (16,17,..31)... finally (112,113,...127) */ - for (i = 0; i < 8; ++i) { - BLAKE2_ROUND_NOMSG( - blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2], - blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5], - blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8], - blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11], - blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14], - blockR.v[16 * i + 15]); - } - - /* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then - (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */ - for (i = 0; i < 8; i++) { - BLAKE2_ROUND_NOMSG( - blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16], - blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33], - blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64], - blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81], - blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112], - blockR.v[2 * i + 113]); - } - - copy_block(next_block, &block_tmp); - xor_block(next_block, &blockR); -} - -void generate_addresses(const argon2_instance_t *instance, - const argon2_position_t *position, - uint64_t *pseudo_rands) { - block zero_block, input_block, address_block; - uint32_t i; - - init_block_value(&zero_block, 0); - init_block_value(&input_block, 0); - init_block_value(&address_block, 0); - - if (instance != NULL && position != NULL) { - input_block.v[0] = position->pass; - input_block.v[1] = position->lane; - input_block.v[2] = position->slice; - input_block.v[3] = 16; - input_block.v[4] = 2; - input_block.v[5] = instance->type; - - for (i = 0; i < 4; ++i) { - if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { - input_block.v[6]++; - fill_block(&zero_block, &input_block, &address_block); - fill_block(&zero_block, &address_block, &address_block); - } - - pseudo_rands[i] = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; - } - } -} - -void fill_segment(const argon2_instance_t *instance, - argon2_position_t position) { - block *ref_block = NULL, *curr_block = NULL; - uint64_t pseudo_rand, ref_index, ref_lane; - uint32_t prev_offset, curr_offset; - uint32_t starting_index; - uint32_t i; - int data_independent_addressing = (instance->type == Argon2_i); - /* Pseudo-random values that determine the reference block position */ - uint64_t *pseudo_rands = NULL; - - if (instance == NULL) { - return; - } - - pseudo_rands = - (uint64_t *)malloc(sizeof(uint64_t) * 4); - - if (pseudo_rands == NULL) { - return; - } - - if (data_independent_addressing) { - generate_addresses(instance, &position, pseudo_rands); - } - - starting_index = 0; - - if ((0 == position.pass) && (0 == position.slice)) { - starting_index = 2; /* we have already generated the first two blocks */ - } - - /* Offset of the current block */ - curr_offset = position.lane * 16 + - position.slice * 4 + starting_index; - - if (0 == curr_offset % 16) { - /* Last block in this lane */ - prev_offset = curr_offset + 16 - 1; - } else { - /* Previous block */ - prev_offset = curr_offset - 1; - } - - for (i = starting_index; i < 4; ++i, ++curr_offset, ++prev_offset) { - /*1.1 Rotating prev_offset if needed */ - if (curr_offset % 16 == 1) { - prev_offset = curr_offset - 1; - } - - /* 1.2 Computing the index of the reference block */ - /* 1.2.1 Taking pseudo-random value from the previous block */ - if (data_independent_addressing) { - pseudo_rand = pseudo_rands[i]; - } else { - pseudo_rand = instance->memory[prev_offset].v[0]; - } - - /* 1.2.2 Computing the lane of the reference block */ - ref_lane = ((pseudo_rand >> 32)) % 1; - - if ((position.pass == 0) && (position.slice == 0)) { - /* Can not reference other lanes yet */ - ref_lane = position.lane; - } - - /* 1.2.3 Computing the number of possible reference block within the - * lane. - */ - position.index = i; - ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, - ref_lane == position.lane); - - /* 2 Creating a new block */ - ref_block = - instance->memory + 16 * ref_lane + ref_index; - curr_block = instance->memory + curr_offset; - fill_block(instance->memory + prev_offset, ref_block, curr_block); - } - - free(pseudo_rands); -} diff --git a/stratum/algos/ar2/ref.h b/stratum/algos/ar2/ref.h deleted file mode 100644 index 7ee22eef9..000000000 --- a/stratum/algos/ar2/ref.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#ifndef ARGON2_REF_H -#define ARGON2_REF_H - -/* - * Function fills a new memory block - * @param prev_block Pointer to the previous block - * @param ref_block Pointer to the reference block - * @param next_block Pointer to the block to be constructed - * @pre all block pointers must be valid - */ -void fill_block(const block *prev_block, const block *ref_block, - block *next_block); - -/* - * Generate pseudo-random values to reference blocks in the segment and puts - * them into the array - * @param instance Pointer to the current instance - * @param position Pointer to the current position - * @param pseudo_rands Pointer to the array of 64-bit values - * @pre pseudo_rands must point to @a instance->segment_length allocated values - */ -void generate_addresses(const argon2_instance_t *instance, - const argon2_position_t *position, - uint64_t *pseudo_rands); - -/* - * Function that fills the segment using previous segments also from other - * threads - * @param instance Pointer to the current instance - * @param position Current position - * @pre all block pointers must be valid - */ -void fill_segment(const argon2_instance_t *instance, - argon2_position_t position); - -#endif /* ARGON2_REF_H */ diff --git a/stratum/algos/ar2/run.c b/stratum/algos/ar2/run.c deleted file mode 100644 index 2b1b30a37..000000000 --- a/stratum/algos/ar2/run.c +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Argon2 source code package - * - * Written by Daniel Dinu and Dmitry Khovratovich, 2015 - * - * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. - * - * You should have received a copy of the CC0 Public Domain Dedication along - * with - * this software. If not, see - * . - */ - -#include -#include -#include -#include -#include -#include - -#include "argon2.h" -#include "cores.h" - -#define T_COST_DEF 3 -#define LOG_M_COST_DEF 12 /* 2^12 = 4 MiB */ -#define LANES_DEF 1 -#define THREADS_DEF 1 -#define OUT_LEN 32 -#define SALT_LEN 16 - -#define UNUSED_PARAMETER(x) (void)(x) - -static void usage(const char *cmd) { - printf("Usage: %s pwd salt [-y version] [-t iterations] [-m memory] [-p " - "parallelism]\n", - cmd); - - printf("Parameters:\n"); - printf("\tpwd\t\tThe password to hash\n"); - printf("\tsalt\t\tThe salt to use, at most 16 characters\n"); - printf("\t-d\t\tUse Argon2d instead of Argon2i (which is the default)\n"); - printf("\t-t N\t\tSets the number of iterations to N (default = %d)\n", - T_COST_DEF); - printf("\t-m N\t\tSets the memory usage of 2^N KiB (default %d)\n", - LOG_M_COST_DEF); - printf("\t-p N\t\tSets parallelism to N threads (default %d)\n", - THREADS_DEF); -} - -static void fatal(const char *error) { - fprintf(stderr, "Error: %s\n", error); - exit(1); -} - -/* -Runs Argon2 with certain inputs and parameters, inputs not cleared. Prints the -Base64-encoded hash string -@out output array with at least 32 bytes allocated -@pwd NULL-terminated string, presumably from argv[] -@salt salt array with at least SALTLEN_DEF bytes allocated -@t_cost number of iterations -@m_cost amount of requested memory in KB -@lanes amount of requested parallelism -@threads actual parallelism -@type String, only "d" and "i" are accepted -*/ -static void run(uint8_t *out, char *pwd, uint8_t *salt, uint32_t t_cost, - uint32_t m_cost, uint32_t lanes, uint32_t threads, - const char *type) { - clock_t start_time, stop_time; - unsigned pwd_length; - argon2_context context; - int i; - - start_time = clock(); - - if (!pwd) { - fatal("password missing"); - } - - if (!salt) { - secure_wipe_memory(pwd, strlen(pwd)); - fatal("salt missing"); - } - - pwd_length = strlen(pwd); - - UNUSED_PARAMETER(threads); - - context.out = out; - context.outlen = OUT_LEN; - context.pwd = (uint8_t *)pwd; - context.pwdlen = pwd_length; - context.salt = salt; - context.saltlen = SALT_LEN; - context.secret = NULL; - context.secretlen = 0; - context.ad = NULL; - context.adlen = 0; - context.t_cost = t_cost; - context.m_cost = m_cost; - context.lanes = lanes; - context.threads = lanes; - context.allocate_cbk = NULL; - context.free_cbk = NULL; - context.flags = ARGON2_FLAG_CLEAR_PASSWORD; - - if (!strcmp(type, "d")) { - int result = argon2d(&context); - if (result != ARGON2_OK) - fatal(error_message(result)); - } else if (!strcmp(type, "i")) { - int result = argon2i(&context); - if (result != ARGON2_OK) - fatal(error_message(result)); - } else { - secure_wipe_memory(pwd, strlen(pwd)); - fatal("wrong Argon2 type"); - } - - stop_time = clock(); - - /* add back when proper decoding */ - /* - char encoded[300]; - encode_string(encoded, sizeof encoded, &context); - printf("%s\n", encoded); - */ - printf("Hash:\t\t"); - for (i = 0; i < context.outlen; ++i) { - printf("%02x", context.out[i]); - } - printf("\n"); - - printf("%2.3f seconds\n", - ((double)stop_time - start_time) / (CLOCKS_PER_SEC)); -} - -int main(int argc, char *argv[]) { - unsigned char out[OUT_LEN]; - uint32_t m_cost = 1 << LOG_M_COST_DEF; - uint32_t t_cost = T_COST_DEF; - uint32_t lanes = LANES_DEF; - uint32_t threads = THREADS_DEF; - char *pwd = NULL; - uint8_t salt[SALT_LEN]; - const char *type = "i"; - int i; - - if (argc < 3) { - usage(argv[0]); - return ARGON2_MISSING_ARGS; - } - - /* get password and salt from command line */ - pwd = argv[1]; - if (strlen(argv[2]) > SALT_LEN) { - fatal("salt too long"); - } - memset(salt, 0x00, SALT_LEN); /* pad with null bytes */ - memcpy(salt, argv[2], strlen(argv[2])); - - /* parse options */ - for (i = 3; i < argc; i++) { - const char *a = argv[i]; - unsigned long input = 0; - if (!strcmp(a, "-m")) { - if (i < argc - 1) { - i++; - input = strtoul(argv[i], NULL, 10); - if (input == 0 || input == ULONG_MAX || - input > ARGON2_MAX_MEMORY_BITS) { - fatal("bad numeric input for -m"); - } - m_cost = ARGON2_MIN(UINT64_C(1) << input, UINT32_C(0xFFFFFFFF)); - if (m_cost > ARGON2_MAX_MEMORY) { - fatal("m_cost overflow"); - } - continue; - } else { - fatal("missing -m argument"); - } - } else if (!strcmp(a, "-t")) { - if (i < argc - 1) { - i++; - input = strtoul(argv[i], NULL, 10); - if (input == 0 || input == ULONG_MAX || - input > ARGON2_MAX_TIME) { - fatal("bad numeric input for -t"); - } - t_cost = input; - continue; - } else { - fatal("missing -t argument"); - } - } else if (!strcmp(a, "-p")) { - if (i < argc - 1) { - i++; - input = strtoul(argv[i], NULL, 10); - if (input == 0 || input == ULONG_MAX || - input > ARGON2_MAX_THREADS || input > ARGON2_MAX_LANES) { - fatal("bad numeric input for -p"); - } - threads = input; - lanes = threads; - continue; - } else { - fatal("missing -p argument"); - } - } else if (!strcmp(a, "-d")) { - type = "d"; - } else { - fatal("unknown argument"); - } - } - printf("Type:\t\tArgon2%c\n", type[0]); - printf("Iterations:\t%" PRIu32 " \n", t_cost); - printf("Memory:\t\t%" PRIu32 " KiB\n", m_cost); - printf("Parallelism:\t%" PRIu32 " \n", lanes); - run(out, pwd, salt, t_cost, m_cost, lanes, threads, type); - - return ARGON2_OK; -} diff --git a/stratum/algos/ar2/thread.c b/stratum/algos/ar2/thread.c new file mode 100644 index 000000000..75d71db37 --- /dev/null +++ b/stratum/algos/ar2/thread.c @@ -0,0 +1,57 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#if !defined(ARGON2_NO_THREADS) + +#include "thread.h" +#if defined(_WIN32) +#include +#endif + +int argon2_thread_create(argon2_thread_handle_t *handle, + argon2_thread_func_t func, void *args) { + if (NULL == handle || func == NULL) { + return -1; + } +#if defined(_WIN32) + *handle = _beginthreadex(NULL, 0, func, args, 0, NULL); + return *handle != 0 ? 0 : -1; +#else + return pthread_create(handle, NULL, func, args); +#endif +} + +int argon2_thread_join(argon2_thread_handle_t handle) { +#if defined(_WIN32) + if (WaitForSingleObject((HANDLE)handle, INFINITE) == WAIT_OBJECT_0) { + return CloseHandle((HANDLE)handle) != 0 ? 0 : -1; + } + return -1; +#else + return pthread_join(handle, NULL); +#endif +} + +void argon2_thread_exit(void) { +#if defined(_WIN32) + _endthreadex(0); +#else + pthread_exit(NULL); +#endif +} + +#endif /* ARGON2_NO_THREADS */ \ No newline at end of file diff --git a/stratum/algos/ar2/thread.h b/stratum/algos/ar2/thread.h new file mode 100644 index 000000000..098ba6430 --- /dev/null +++ b/stratum/algos/ar2/thread.h @@ -0,0 +1,67 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef ARGON2_THREAD_H +#define ARGON2_THREAD_H + +#if !defined(ARGON2_NO_THREADS) + +/* + Here we implement an abstraction layer for the simpĺe requirements + of the Argon2 code. We only require 3 primitives---thread creation, + joining, and termination---so full emulation of the pthreads API + is unwarranted. Currently we wrap pthreads and Win32 threads. + + The API defines 2 types: the function pointer type, + argon2_thread_func_t, + and the type of the thread handle---argon2_thread_handle_t. +*/ +#if defined(_WIN32) +#include +typedef unsigned(__stdcall *argon2_thread_func_t)(void *); +typedef uintptr_t argon2_thread_handle_t; +#else +#include +typedef void *(*argon2_thread_func_t)(void *); +typedef pthread_t argon2_thread_handle_t; +#endif + +/* Creates a thread + * @param handle pointer to a thread handle, which is the output of this + * function. Must not be NULL. + * @param func A function pointer for the thread's entry point. Must not be + * NULL. + * @param args Pointer that is passed as an argument to @func. May be NULL. + * @return 0 if @handle and @func are valid pointers and a thread is successfuly + * created. + */ +int argon2_thread_create(argon2_thread_handle_t *handle, + argon2_thread_func_t func, void *args); + +/* Waits for a thread to terminate + * @param handle Handle to a thread created with argon2_thread_create. + * @return 0 if @handle is a valid handle, and joining completed successfully. +*/ +int argon2_thread_join(argon2_thread_handle_t handle); + +/* Terminate the current thread. Must be run inside a thread created by + * argon2_thread_create. +*/ +void argon2_thread_exit(void); + +#endif /* ARGON2_NO_THREADS */ +#endif \ No newline at end of file diff --git a/stratum/algos/argon2a.c b/stratum/algos/argon2a.c index eab0d81a9..e32b0b8f1 100644 --- a/stratum/algos/argon2a.c +++ b/stratum/algos/argon2a.c @@ -5,8 +5,9 @@ #include "sysendian.h" +#include "argon2a.h" #include "ar2/argon2.h" -#include "ar2/cores.h" +#include "ar2/core.h" #include "ar2/ar2-scrypt-jane.h" #define _ALIGN(x) __attribute__ ((aligned(x))) @@ -24,10 +25,10 @@ inline void argon_call(void *out, void *in, void *salt, int type) context.pwd = (uint8_t *)in; context.salt = (uint8_t *)salt; - argon2_core(&context, type); + argon2_ctx(&context, type); } -void argon2_hash(const char* input, char* output, uint32_t len) +void argon2a_hash(const char* input, char* output, uint32_t len) { uint32_t _ALIGN(32) hashA[8], hashB[8]; diff --git a/stratum/algos/argon2a.h b/stratum/algos/argon2a.h index 05238ffea..2cfe3f0d5 100644 --- a/stratum/algos/argon2a.h +++ b/stratum/algos/argon2a.h @@ -7,7 +7,7 @@ extern "C" { #include -void argon2_hash(const char* input, char* output, uint32_t len); +void argon2a_hash(const char* input, char* output, uint32_t len); #ifdef __cplusplus } diff --git a/stratum/algos/argon2d-dyn.c b/stratum/algos/argon2d-dyn.c new file mode 100644 index 000000000..c9c6a5677 --- /dev/null +++ b/stratum/algos/argon2d-dyn.c @@ -0,0 +1,43 @@ +#include +#include +#include +#include + +#include "sysendian.h" + +#include "ar2/argon2.h" +#include "ar2/core.h" + +static const size_t INPUT_BYTES = 80; // Lenth of a block header in bytes. Input Length = Salt Length (salt = input) +static const size_t OUTPUT_BYTES = 32; // Length of output needed for a 256-bit hash +static const unsigned int DEFAULT_ARGON2_FLAG = 2; //Same as ARGON2_DEFAULT_FLAGS + +void argon2d_call(const void *input, void *output) +{ + argon2_context context; + context.out = (uint8_t *)output; + context.outlen = (uint32_t)OUTPUT_BYTES; + context.pwd = (uint8_t *)input; + context.pwdlen = (uint32_t)INPUT_BYTES; + context.salt = (uint8_t *)input; //salt = input + context.saltlen = (uint32_t)INPUT_BYTES; + context.secret = NULL; + context.secretlen = 0; + context.ad = NULL; + context.adlen = 0; + context.allocate_cbk = NULL; + context.free_cbk = NULL; + context.flags = DEFAULT_ARGON2_FLAG; // = ARGON2_DEFAULT_FLAGS + // main configurable Argon2 hash parameters + context.m_cost = 500; // Memory in KiB (512KB) + context.lanes = 8; // Degree of Parallelism + context.threads = 1; // Threads + context.t_cost = 2; // Iterations + + argon2_ctx(&context, Argon2_d); +} + +void argon2d_dyn_hash(const unsigned char* input, unsigned char* output, unsigned int len) +{ + argon2d_call(input, output); +} \ No newline at end of file diff --git a/stratum/algos/argon2d-dyn.h b/stratum/algos/argon2d-dyn.h new file mode 100644 index 000000000..1d5f99ca6 --- /dev/null +++ b/stratum/algos/argon2d-dyn.h @@ -0,0 +1,16 @@ +#ifndef ARGON2D_H +#define ARGON2D_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +void argon2d_dyn_hash(const char* input, char* output, unsigned int len); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/stratum/algos/ar2/blake2/blake2-impl.h b/stratum/algos/blake2/blake2-impl.h similarity index 82% rename from stratum/algos/ar2/blake2/blake2-impl.h rename to stratum/algos/blake2/blake2-impl.h index 115a192db..241f0beb3 100644 --- a/stratum/algos/ar2/blake2/blake2-impl.h +++ b/stratum/algos/blake2/blake2-impl.h @@ -1,3 +1,20 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + #ifndef PORTABLE_BLAKE2_IMPL_H #define PORTABLE_BLAKE2_IMPL_H @@ -134,10 +151,6 @@ static BLAKE2_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) { return (w >> c) | (w << (64 - c)); } -/* prevents compiler optimizing out memset() */ -static BLAKE2_INLINE void burn(void *v, size_t n) { - static void *(*const volatile memset_v)(void *, int, size_t) = &memset; - memset_v(v, 0, n); -} +void clear_internal_memory(void *v, size_t n); #endif diff --git a/stratum/algos/ar2/blake2/blake2.h b/stratum/algos/blake2/blake2.h similarity index 70% rename from stratum/algos/ar2/blake2/blake2.h rename to stratum/algos/blake2/blake2.h index 7d8d5eb51..12533d1e7 100644 --- a/stratum/algos/ar2/blake2/blake2.h +++ b/stratum/algos/blake2/blake2.h @@ -1,3 +1,20 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + #ifndef PORTABLE_BLAKE2_H #define PORTABLE_BLAKE2_H @@ -37,10 +54,10 @@ typedef struct __blake2b_state { uint64_t h[8]; uint64_t t[2]; uint64_t f[2]; + uint8_t buf[BLAKE2B_BLOCKBYTES]; unsigned buflen; unsigned outlen; uint8_t last_node; - uint8_t buf[BLAKE2B_BLOCKBYTES]; } blake2b_state; /* Ensure param structs have not been wrongly padded */ @@ -57,17 +74,15 @@ int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t keylen); int blake2b_init_param(blake2b_state *S, const blake2b_param *P); int blake2b_update(blake2b_state *S, const void *in, size_t inlen); -void my_blake2b_update(blake2b_state *S, const void *in, size_t inlen); int blake2b_final(blake2b_state *S, void *out, size_t outlen); /* Simple API */ -int blake2b(void *out, const void *in, const void *key, size_t keylen); +int blake2b(void *out, size_t outlen, const void *in, size_t inlen, + const void *key, size_t keylen); /* Argon2 Team - Begin Code */ -int blake2b_long(void *out, const void *in); +int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen); /* Argon2 Team - End Code */ -/* Miouyouyou */ -void blake2b_too(void *out, const void *in); #if defined(__cplusplus) } diff --git a/stratum/algos/blake2/blake2b.c b/stratum/algos/blake2/blake2b.c new file mode 100644 index 000000000..ca05df598 --- /dev/null +++ b/stratum/algos/blake2/blake2b.c @@ -0,0 +1,390 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#include +#include +#include + +#include "blake2.h" +#include "blake2-impl.h" + +static const uint64_t blake2b_IV[8] = { + UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b), + UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1), + UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f), + UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179)}; + +static const unsigned int blake2b_sigma[12][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, +}; + +static BLAKE2_INLINE void blake2b_set_lastnode(blake2b_state *S) { + S->f[1] = (uint64_t)-1; +} + +static BLAKE2_INLINE void blake2b_set_lastblock(blake2b_state *S) { + if (S->last_node) { + blake2b_set_lastnode(S); + } + S->f[0] = (uint64_t)-1; +} + +static BLAKE2_INLINE void blake2b_increment_counter(blake2b_state *S, + uint64_t inc) { + S->t[0] += inc; + S->t[1] += (S->t[0] < inc); +} + +static BLAKE2_INLINE void blake2b_invalidate_state(blake2b_state *S) { + clear_internal_memory(S, sizeof(*S)); /* wipe */ + blake2b_set_lastblock(S); /* invalidate for further use */ +} + +static BLAKE2_INLINE void blake2b_init0(blake2b_state *S) { + memset(S, 0, sizeof(*S)); + memcpy(S->h, blake2b_IV, sizeof(S->h)); +} + +int blake2b_init_param(blake2b_state *S, const blake2b_param *P) { + const unsigned char *p = (const unsigned char *)P; + unsigned int i; + + if (NULL == P || NULL == S) { + return -1; + } + + blake2b_init0(S); + /* IV XOR Parameter Block */ + for (i = 0; i < 8; ++i) { + S->h[i] ^= load64(&p[i * sizeof(S->h[i])]); + } + S->outlen = P->digest_length; + return 0; +} + +/* Sequential blake2b initialization */ +int blake2b_init(blake2b_state *S, size_t outlen) { + blake2b_param P; + + if (S == NULL) { + return -1; + } + + if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) { + blake2b_invalidate_state(S); + return -1; + } + + /* Setup Parameter Block for unkeyed BLAKE2 */ + P.digest_length = (uint8_t)outlen; + P.key_length = 0; + P.fanout = 1; + P.depth = 1; + P.leaf_length = 0; + P.node_offset = 0; + P.node_depth = 0; + P.inner_length = 0; + memset(P.reserved, 0, sizeof(P.reserved)); + memset(P.salt, 0, sizeof(P.salt)); + memset(P.personal, 0, sizeof(P.personal)); + + return blake2b_init_param(S, &P); +} + +int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, + size_t keylen) { + blake2b_param P; + + if (S == NULL) { + return -1; + } + + if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) { + blake2b_invalidate_state(S); + return -1; + } + + if ((key == 0) || (keylen == 0) || (keylen > BLAKE2B_KEYBYTES)) { + blake2b_invalidate_state(S); + return -1; + } + + /* Setup Parameter Block for keyed BLAKE2 */ + P.digest_length = (uint8_t)outlen; + P.key_length = (uint8_t)keylen; + P.fanout = 1; + P.depth = 1; + P.leaf_length = 0; + P.node_offset = 0; + P.node_depth = 0; + P.inner_length = 0; + memset(P.reserved, 0, sizeof(P.reserved)); + memset(P.salt, 0, sizeof(P.salt)); + memset(P.personal, 0, sizeof(P.personal)); + + if (blake2b_init_param(S, &P) < 0) { + blake2b_invalidate_state(S); + return -1; + } + + { + uint8_t block[BLAKE2B_BLOCKBYTES]; + memset(block, 0, BLAKE2B_BLOCKBYTES); + memcpy(block, key, keylen); + blake2b_update(S, block, BLAKE2B_BLOCKBYTES); + /* Burn the key from stack */ + clear_internal_memory(block, BLAKE2B_BLOCKBYTES); + } + return 0; +} + +static void blake2b_compress(blake2b_state *S, const uint8_t *block) { + uint64_t m[16]; + uint64_t v[16]; + unsigned int i, r; + + for (i = 0; i < 16; ++i) { + m[i] = load64(block + i * sizeof(m[i])); + } + + for (i = 0; i < 8; ++i) { + v[i] = S->h[i]; + } + + v[8] = blake2b_IV[0]; + v[9] = blake2b_IV[1]; + v[10] = blake2b_IV[2]; + v[11] = blake2b_IV[3]; + v[12] = blake2b_IV[4] ^ S->t[0]; + v[13] = blake2b_IV[5] ^ S->t[1]; + v[14] = blake2b_IV[6] ^ S->f[0]; + v[15] = blake2b_IV[7] ^ S->f[1]; + +#define G(r, i, a, b, c, d) \ + do { \ + a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while ((void)0, 0) + +#define ROUND(r) \ + do { \ + G(r, 0, v[0], v[4], v[8], v[12]); \ + G(r, 1, v[1], v[5], v[9], v[13]); \ + G(r, 2, v[2], v[6], v[10], v[14]); \ + G(r, 3, v[3], v[7], v[11], v[15]); \ + G(r, 4, v[0], v[5], v[10], v[15]); \ + G(r, 5, v[1], v[6], v[11], v[12]); \ + G(r, 6, v[2], v[7], v[8], v[13]); \ + G(r, 7, v[3], v[4], v[9], v[14]); \ + } while ((void)0, 0) + + for (r = 0; r < 12; ++r) { + ROUND(r); + } + + for (i = 0; i < 8; ++i) { + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } + +#undef G +#undef ROUND +} + +int blake2b_update(blake2b_state *S, const void *in, size_t inlen) { + const uint8_t *pin = (const uint8_t *)in; + + if (inlen == 0) { + return 0; + } + + /* Sanity check */ + if (S == NULL || in == NULL) { + return -1; + } + + /* Is this a reused state? */ + if (S->f[0] != 0) { + return -1; + } + + if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) { + /* Complete current block */ + size_t left = S->buflen; + size_t fill = BLAKE2B_BLOCKBYTES - left; + memcpy(&S->buf[left], pin, fill); + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, S->buf); + S->buflen = 0; + inlen -= fill; + pin += fill; + /* Avoid buffer copies when possible */ + while (inlen > BLAKE2B_BLOCKBYTES) { + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, pin); + inlen -= BLAKE2B_BLOCKBYTES; + pin += BLAKE2B_BLOCKBYTES; + } + } + memcpy(&S->buf[S->buflen], pin, inlen); + S->buflen += (unsigned int)inlen; + return 0; +} + +int blake2b_final(blake2b_state *S, void *out, size_t outlen) { + uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; + unsigned int i; + + /* Sanity checks */ + if (S == NULL || out == NULL || outlen < S->outlen) { + return -1; + } + + /* Is this a reused state? */ + if (S->f[0] != 0) { + return -1; + } + + blake2b_increment_counter(S, S->buflen); + blake2b_set_lastblock(S); + memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */ + blake2b_compress(S, S->buf); + + for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */ + store64(buffer + sizeof(S->h[i]) * i, S->h[i]); + } + + memcpy(out, buffer, S->outlen); + clear_internal_memory(buffer, sizeof(buffer)); + clear_internal_memory(S->buf, sizeof(S->buf)); + clear_internal_memory(S->h, sizeof(S->h)); + return 0; +} + +int blake2b(void *out, size_t outlen, const void *in, size_t inlen, + const void *key, size_t keylen) { + blake2b_state S; + int ret = -1; + + /* Verify parameters */ + if (NULL == in && inlen > 0) { + goto fail; + } + + if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) { + goto fail; + } + + if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) { + goto fail; + } + + if (keylen > 0) { + if (blake2b_init_key(&S, outlen, key, keylen) < 0) { + goto fail; + } + } else { + if (blake2b_init(&S, outlen) < 0) { + goto fail; + } + } + + if (blake2b_update(&S, in, inlen) < 0) { + goto fail; + } + ret = blake2b_final(&S, out, outlen); + +fail: + clear_internal_memory(&S, sizeof(S)); + return ret; +} + +/* Argon2 Team - Begin Code */ +int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) { + uint8_t *out = (uint8_t *)pout; + blake2b_state blake_state; + uint8_t outlen_bytes[sizeof(uint32_t)] = {0}; + int ret = -1; + + if (outlen > UINT32_MAX) { + goto fail; + } + + /* Ensure little-endian byte order! */ + store32(outlen_bytes, (uint32_t)outlen); + +#define TRY(statement) \ + do { \ + ret = statement; \ + if (ret < 0) { \ + goto fail; \ + } \ + } while ((void)0, 0) + + if (outlen <= BLAKE2B_OUTBYTES) { + TRY(blake2b_init(&blake_state, outlen)); + TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes))); + TRY(blake2b_update(&blake_state, in, inlen)); + TRY(blake2b_final(&blake_state, out, outlen)); + } else { + uint32_t toproduce; + uint8_t out_buffer[BLAKE2B_OUTBYTES]; + uint8_t in_buffer[BLAKE2B_OUTBYTES]; + TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES)); + TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes))); + TRY(blake2b_update(&blake_state, in, inlen)); + TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES)); + memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); + out += BLAKE2B_OUTBYTES / 2; + toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2; + + while (toproduce > BLAKE2B_OUTBYTES) { + memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); + TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer, + BLAKE2B_OUTBYTES, NULL, 0)); + memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); + out += BLAKE2B_OUTBYTES / 2; + toproduce -= BLAKE2B_OUTBYTES / 2; + } + + memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); + TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL, + 0)); + memcpy(out, out_buffer, toproduce); + } +fail: + clear_internal_memory(&blake_state, sizeof(blake_state)); + return ret; +#undef TRY +} +/* Argon2 Team - End Code */ diff --git a/stratum/algos/blake2/blamka-round-opt.h b/stratum/algos/blake2/blamka-round-opt.h new file mode 100644 index 000000000..faf96662e --- /dev/null +++ b/stratum/algos/blake2/blamka-round-opt.h @@ -0,0 +1,476 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef BLAKE_ROUND_MKA_OPT_H +#define BLAKE_ROUND_MKA_OPT_H + +#if defined(HAVE_CONFIG_H) +#include "config/dynamic-config.h" +#endif + +#include "blake2-impl.h" + +#include +#if defined(__SSSE3__) +#include /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */ +#endif + +#if defined(__XOP__) && (defined(__GNUC__) || defined(__clang__)) +#include +#endif + +#if !defined(__AVX512F__) +#if !defined(__AVX2__) +#if !defined(__XOP__) +#if defined(__SSSE3__) +#define r16 \ + (_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)) +#define r24 \ + (_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)) +#define _mm_roti_epi64(x, c) \ + (-(c) == 32) \ + ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \ + : (-(c) == 24) \ + ? _mm_shuffle_epi8((x), r24) \ + : (-(c) == 16) \ + ? _mm_shuffle_epi8((x), r16) \ + : (-(c) == 63) \ + ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ + _mm_add_epi64((x), (x))) \ + : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ + _mm_slli_epi64((x), 64 - (-(c)))) +#else /* defined(__SSE2__) */ +#define _mm_roti_epi64(r, c) \ + _mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c)))) +#endif +#else +#endif + +static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) { + const __m128i z = _mm_mul_epu32(x, y); + return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z)); +} + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = fBlaMka(A0, B0); \ + A1 = fBlaMka(A1, B1); \ + \ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ + \ + D0 = _mm_roti_epi64(D0, -32); \ + D1 = _mm_roti_epi64(D1, -32); \ + \ + C0 = fBlaMka(C0, D0); \ + C1 = fBlaMka(C1, D1); \ + \ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ + \ + B0 = _mm_roti_epi64(B0, -24); \ + B1 = _mm_roti_epi64(B1, -24); \ + } while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = fBlaMka(A0, B0); \ + A1 = fBlaMka(A1, B1); \ + \ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ + \ + D0 = _mm_roti_epi64(D0, -16); \ + D1 = _mm_roti_epi64(D1, -16); \ + \ + C0 = fBlaMka(C0, D0); \ + C1 = fBlaMka(C1, D1); \ + \ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ + \ + B0 = _mm_roti_epi64(B0, -63); \ + B1 = _mm_roti_epi64(B1, -63); \ + } while ((void)0, 0) + +#if defined(__SSSE3__) +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \ + __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \ + B0 = t0; \ + B1 = t1; \ + \ + t0 = C0; \ + C0 = C1; \ + C1 = t0; \ + \ + t0 = _mm_alignr_epi8(D1, D0, 8); \ + t1 = _mm_alignr_epi8(D0, D1, 8); \ + D0 = t1; \ + D1 = t0; \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \ + __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \ + B0 = t0; \ + B1 = t1; \ + \ + t0 = C0; \ + C0 = C1; \ + C1 = t0; \ + \ + t0 = _mm_alignr_epi8(D0, D1, 8); \ + t1 = _mm_alignr_epi8(D1, D0, 8); \ + D0 = t1; \ + D1 = t0; \ + } while ((void)0, 0) +#else /* SSE2 */ +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = D0; \ + __m128i t1 = B0; \ + D0 = C0; \ + C0 = C1; \ + C1 = D0; \ + D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \ + D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \ + B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \ + B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0, t1; \ + t0 = C0; \ + C0 = C1; \ + C1 = t0; \ + t0 = B0; \ + t1 = D0; \ + B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \ + B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \ + D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \ + D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \ + } while ((void)0, 0) +#endif + +#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) + +#else /* __AVX2__ */ + +#include + +#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1)) +#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)) +#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)) +#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x))) + +#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + __m256i ml = _mm256_mul_epu32(A0, B0); \ + ml = _mm256_add_epi64(ml, ml); \ + A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \ + D0 = _mm256_xor_si256(D0, A0); \ + D0 = rotr32(D0); \ + \ + ml = _mm256_mul_epu32(C0, D0); \ + ml = _mm256_add_epi64(ml, ml); \ + C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \ + \ + B0 = _mm256_xor_si256(B0, C0); \ + B0 = rotr24(B0); \ + \ + ml = _mm256_mul_epu32(A1, B1); \ + ml = _mm256_add_epi64(ml, ml); \ + A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \ + D1 = _mm256_xor_si256(D1, A1); \ + D1 = rotr32(D1); \ + \ + ml = _mm256_mul_epu32(C1, D1); \ + ml = _mm256_add_epi64(ml, ml); \ + C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \ + \ + B1 = _mm256_xor_si256(B1, C1); \ + B1 = rotr24(B1); \ + } while((void)0, 0); + +#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + __m256i ml = _mm256_mul_epu32(A0, B0); \ + ml = _mm256_add_epi64(ml, ml); \ + A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \ + D0 = _mm256_xor_si256(D0, A0); \ + D0 = rotr16(D0); \ + \ + ml = _mm256_mul_epu32(C0, D0); \ + ml = _mm256_add_epi64(ml, ml); \ + C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \ + B0 = _mm256_xor_si256(B0, C0); \ + B0 = rotr63(B0); \ + \ + ml = _mm256_mul_epu32(A1, B1); \ + ml = _mm256_add_epi64(ml, ml); \ + A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \ + D1 = _mm256_xor_si256(D1, A1); \ + D1 = rotr16(D1); \ + \ + ml = _mm256_mul_epu32(C1, D1); \ + ml = _mm256_add_epi64(ml, ml); \ + C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \ + B1 = _mm256_xor_si256(B1, C1); \ + B1 = rotr63(B1); \ + } while((void)0, 0); + +#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \ + C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \ + \ + B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \ + C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ + D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \ + } while((void)0, 0); + +#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \ + __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \ + B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ + \ + tmp1 = C0; \ + C0 = C1; \ + C1 = tmp1; \ + \ + tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \ + tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \ + D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ + } while(0); + +#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \ + C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \ + \ + B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \ + C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ + D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \ + } while((void)0, 0); + +#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \ + __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \ + B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ + \ + tmp1 = C0; \ + C0 = C1; \ + C1 = tmp1; \ + \ + tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \ + tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \ + D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ + } while((void)0, 0); + +#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \ + do{ \ + G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + \ + DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ + \ + G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + \ + UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ + } while((void)0, 0); + +#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do{ \ + G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + \ + DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + \ + G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + \ + UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + } while((void)0, 0); + +#endif /* __AVX2__ */ + +#else /* __AVX512F__ */ + +#include + +#define ror64(x, n) _mm512_ror_epi64((x), (n)) + +static __m512i muladd(__m512i x, __m512i y) +{ + __m512i z = _mm512_mul_epu32(x, y); + return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z)); +} + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = muladd(A0, B0); \ + A1 = muladd(A1, B1); \ +\ + D0 = _mm512_xor_si512(D0, A0); \ + D1 = _mm512_xor_si512(D1, A1); \ +\ + D0 = ror64(D0, 32); \ + D1 = ror64(D1, 32); \ +\ + C0 = muladd(C0, D0); \ + C1 = muladd(C1, D1); \ +\ + B0 = _mm512_xor_si512(B0, C0); \ + B1 = _mm512_xor_si512(B1, C1); \ +\ + B0 = ror64(B0, 24); \ + B1 = ror64(B1, 24); \ + } while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = muladd(A0, B0); \ + A1 = muladd(A1, B1); \ +\ + D0 = _mm512_xor_si512(D0, A0); \ + D1 = _mm512_xor_si512(D1, A1); \ +\ + D0 = ror64(D0, 16); \ + D1 = ror64(D1, 16); \ +\ + C0 = muladd(C0, D0); \ + C1 = muladd(C1, D1); \ +\ + B0 = _mm512_xor_si512(B0, C0); \ + B1 = _mm512_xor_si512(B1, C1); \ +\ + B0 = ror64(B0, 63); \ + B1 = ror64(B1, 63); \ + } while ((void)0, 0) + +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \ + B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \ +\ + C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ +\ + D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \ + D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \ + B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \ +\ + C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ +\ + D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \ + D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) + +#define SWAP_HALVES(A0, A1) \ + do { \ + __m512i t0, t1; \ + t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \ + t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \ + A0 = t0; \ + A1 = t1; \ + } while((void)0, 0) + +#define SWAP_QUARTERS(A0, A1) \ + do { \ + SWAP_HALVES(A0, A1); \ + A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \ + A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \ + } while((void)0, 0) + +#define UNSWAP_QUARTERS(A0, A1) \ + do { \ + A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \ + A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \ + SWAP_HALVES(A0, A1); \ + } while((void)0, 0) + +#define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \ + do { \ + SWAP_HALVES(A0, B0); \ + SWAP_HALVES(C0, D0); \ + SWAP_HALVES(A1, B1); \ + SWAP_HALVES(C1, D1); \ + BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \ + SWAP_HALVES(A0, B0); \ + SWAP_HALVES(C0, D0); \ + SWAP_HALVES(A1, B1); \ + SWAP_HALVES(C1, D1); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + SWAP_QUARTERS(A0, A1); \ + SWAP_QUARTERS(B0, B1); \ + SWAP_QUARTERS(C0, C1); \ + SWAP_QUARTERS(D0, D1); \ + BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \ + UNSWAP_QUARTERS(A0, A1); \ + UNSWAP_QUARTERS(B0, B1); \ + UNSWAP_QUARTERS(C0, C1); \ + UNSWAP_QUARTERS(D0, D1); \ + } while ((void)0, 0) + +#endif /* __AVX512F__ */ +#endif /* BLAKE_ROUND_MKA_OPT_H */ \ No newline at end of file diff --git a/stratum/algos/ar2/blake2/blamka-round-ref.h b/stratum/algos/blake2/blamka-round-ref.h similarity index 75% rename from stratum/algos/ar2/blake2/blamka-round-ref.h rename to stratum/algos/blake2/blamka-round-ref.h index f497e10c2..2238959e1 100644 --- a/stratum/algos/ar2/blake2/blamka-round-ref.h +++ b/stratum/algos/blake2/blamka-round-ref.h @@ -1,3 +1,20 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + #ifndef BLAKE_ROUND_MKA_H #define BLAKE_ROUND_MKA_H @@ -36,4 +53,4 @@ static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) { G(v3, v4, v9, v14); \ } while ((void)0, 0) -#endif +#endif \ No newline at end of file diff --git a/stratum/algos/makefile b/stratum/algos/makefile index 9ca631115..205919127 100644 --- a/stratum/algos/makefile +++ b/stratum/algos/makefile @@ -17,8 +17,8 @@ SOURCES=lyra2re.c lyra2v2.c Lyra2.c lyra2z.c Lyra2-z.c Sponge.c allium.c \ scrypt.c scryptn.c sha256.c sha256t.c \ yescrypt.c yescrypt-opt.c sha256_Y.c \ a5a.c a5amath.c m7m.c magimath.cpp velvet.c \ - argon2a.c ar2/blake2b.c ar2/argon2.c ar2/ref.c ar2/cores.c ar2/ar2-scrypt-jane.c \ - hive.c pomelo.c hex.c \ + argon2a.c blake2/blake2b.c ar2/argon2.c ar2/core.c ar2/encoding.c ar2/opt.c ar2/thread.c ar2/ar2-scrypt-jane.c \ + hive.c pomelo.c hex.c argon2d-dyn.c \ phi.c phi2.c polytimos.c rainforest.c skunk.c sib.c veltor.c gost.c aergo.c OBJECTS=$(SOURCES:%.c=%.o) $(SOURCES:%.cpp=%.o) @@ -41,4 +41,4 @@ $(OUTPUT): $(OBJECTS) clean: rm -f *.o rm -f ar2/*.o - + rm -f blake2/*.o diff --git a/stratum/coinbase.cpp b/stratum/coinbase.cpp index f42879fa4..7d299ac49 100644 --- a/stratum/coinbase.cpp +++ b/stratum/coinbase.cpp @@ -122,6 +122,69 @@ void coinbase_create(YAAMP_COIND *coind, YAAMP_JOB_TEMPLATE *templ, json_value * if (strlen(coind->charity_address) == 0) sprintf(coind->charity_address, "EdFwYw4Mo2Zq6CFM2yNJgXvE2DTJxgdBRX"); } + else if(strcmp(coind->symbol, "DYN") == 0) + { + char script_dests[2048] = { 0 }; + char script_payee[128] = { 0 }; + char payees[3]; + int npayees = (templ->has_segwit_txs) ? 2 : 1; + bool dynode_enabled; + dynode_enabled = json_get_bool(json_result, "dynode_payments_enforced"); + bool superblocks_enabled = json_get_bool(json_result, "superblocks_enabled"); + json_value* superblock = json_get_array(json_result, "superblock"); + json_value* dynode; + dynode = json_get_object(json_result, "dynode"); + if(!dynode && json_get_bool(json_result, "dynode_payments")) { + coind->oldmasternodes = true; + debuglog("%s is using old dynodes rpc keys\n", coind->symbol); + return; + } + + if(superblocks_enabled && superblock) { + for(int i = 0; i < superblock->u.array.length; i++) { + const char *payee = json_get_string(superblock->u.array.values[i], "payee"); + json_int_t amount = json_get_int(superblock->u.array.values[i], "amount"); + if (payee && amount) { + npayees++; + available -= amount; + base58_decode(payee, script_payee); + job_pack_tx(coind, script_dests, amount, script_payee); + //debuglog("%s superblock found %s %u\n", coind->symbol, payee, amount); + } + } + } + if (dynode_enabled && dynode) { + bool started; + started = json_get_bool(json_result, "dynode_payments_started"); + const char *payee = json_get_string(dynode, "payee"); + json_int_t amount = json_get_int(dynode, "amount"); + if (!payee) + debuglog("coinbase_create failed to get Dynode payee\n"); + + if (!amount) + debuglog("coinbase_create failed to get Dynode amount\n"); + + if (!started) + debuglog("coinbase_create failed to get Dynode started\n"); + + if (payee && amount && started) { + npayees++; + available -= amount; + base58_decode(payee, script_payee); + job_pack_tx(coind, script_dests, amount, script_payee); + //debuglog("%s dynode found %s %u\n", coind->symbol, payee, amount); + } + } + sprintf(payees, "%02x", npayees); + strcat(templ->coinb2, payees); + if (templ->has_segwit_txs) strcat(templ->coinb2, commitment); + strcat(templ->coinb2, script_dests); + job_pack_tx(coind, templ->coinb2, available, NULL); + strcat(templ->coinb2, "00000000"); // locktime + coind->reward = (double)available/100000000*coind->reward_mul; + //debuglog("%s %d dests %s\n", coind->symbol, npayees, script_dests); + return; + } else if(strcmp(coind->symbol, "LTCR") == 0) { if (coind->charity_percent <= 0) coind->charity_percent = 10; diff --git a/stratum/config.sample/argon2d-dyn.conf b/stratum/config.sample/argon2d-dyn.conf new file mode 100644 index 000000000..fc53112c0 --- /dev/null +++ b/stratum/config.sample/argon2d-dyn.conf @@ -0,0 +1,15 @@ +[TCP] +server = yaamp.com +port = 4239 +password = tu8tu5 + +[SQL] +host = yaampdb +database = yaamp +username = root +password = patofpaq + +[STRATUM] +algo = argon2d-dyn +difficulty = 2.0 +max_ttf = 400000000 \ No newline at end of file diff --git a/stratum/stratum.cpp b/stratum/stratum.cpp index 281ecbcdd..ecfa7e572 100644 --- a/stratum/stratum.cpp +++ b/stratum/stratum.cpp @@ -182,7 +182,8 @@ YAAMP_ALGO g_algos[] = {"m7m", m7m_hash, 0x10000, 0, 0}, {"veltor", veltor_hash, 1, 0, 0}, {"velvet", velvet_hash, 0x10000, 0, 0}, - {"argon2", argon2_hash, 0x10000, 0, sha256_hash_hex }, + {"argon2", argon2a_hash, 0x10000, 0, sha256_hash_hex }, + {"argon2d-dyn", argon2d_dyn_hash, 0x10000, 0, 0 }, // Dynamic Argon2d Implementation {"vitalium", vitalium_hash, 1, 0, 0}, {"aergo", aergo_hash, 1, 0, 0}, diff --git a/stratum/stratum.h b/stratum/stratum.h index 321f236d2..5f11fadcb 100644 --- a/stratum/stratum.h +++ b/stratum/stratum.h @@ -207,4 +207,4 @@ void sha256_double_hash_hex(const char *input, char *output, unsigned int len); #include "algos/vitalium.h" #include "algos/aergo.h" #include "algos/hex.h" - +#include "algos/argon2d-dyn.h" diff --git a/web/yaamp/core/functions/yaamp.php b/web/yaamp/core/functions/yaamp.php index 980d0036a..32dc69cee 100755 --- a/web/yaamp/core/functions/yaamp.php +++ b/web/yaamp/core/functions/yaamp.php @@ -10,6 +10,7 @@ function yaamp_get_algos() 'scryptn', 'allium', 'argon2', + 'argon2d-dyn', 'aergo', 'bastion', 'bitcore', @@ -107,6 +108,7 @@ function yaamp_get_algo_norm($algo) 'x11' => 1.0, 'x13' => 1.0, 'argon2' => 1.0, + 'argon2d-dyn' => 1.0, 'lyra2' => 1.0, 'lyra2v2' => 1.0, 'myr-gr' => 1.0, @@ -155,6 +157,7 @@ function getAlgoColors($algo) 'xevan' => '#f0b0a0', 'allium' => '#80a0d0', 'argon2' => '#e0d0e0', + 'argon2d-dyn' => '#e0d0e0', 'aergo' => '#e0d0e0', 'bastion' => '#e0b0b0', 'blake' => '#f0f0f0', @@ -237,6 +240,7 @@ function getAlgoPort($algo) 'whirlpool' => 4133, 'neoscrypt' => 4233, 'argon2' => 4234, + 'argon2d-dyn' => 4239, 'scryptn' => 4333, 'allium' => 4443, 'lyra2' => 4433,