From 548de42ecfc1d9b087af0c3a144d5f371ee21846 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Sat, 22 Jul 2017 18:03:17 +0000 Subject: [PATCH 1/9] add resizeable scratch space API Alignment support by Pieter Wuille. --- Makefile.am | 2 ++ include/secp256k1.h | 35 +++++++++++++++++++++ src/scratch.h | 35 +++++++++++++++++++++ src/scratch_impl.h | 77 +++++++++++++++++++++++++++++++++++++++++++++ src/secp256k1.c | 12 +++++++ src/tests.c | 36 +++++++++++++++++++++ src/util.h | 8 +++++ 7 files changed, 205 insertions(+) create mode 100644 src/scratch.h create mode 100644 src/scratch_impl.h diff --git a/Makefile.am b/Makefile.am index c071fbe2753c9..7603cc6261361 100644 --- a/Makefile.am +++ b/Makefile.am @@ -42,6 +42,8 @@ noinst_HEADERS += src/field_5x52_asm_impl.h noinst_HEADERS += src/java/org_bitcoin_NativeSecp256k1.h noinst_HEADERS += src/java/org_bitcoin_Secp256k1Context.h noinst_HEADERS += src/util.h +noinst_HEADERS += src/scratch.h +noinst_HEADERS += src/scratch_impl.h noinst_HEADERS += src/testrand.h noinst_HEADERS += src/testrand_impl.h noinst_HEADERS += src/hash.h diff --git a/include/secp256k1.h b/include/secp256k1.h index 3e9c098d19f6a..5c109f4852383 100644 --- a/include/secp256k1.h +++ b/include/secp256k1.h @@ -42,6 +42,19 @@ extern "C" { */ typedef struct secp256k1_context_struct secp256k1_context; +/** Opaque data structure that holds rewriteable "scratch space" + * + * The purpose of this structure is to replace dynamic memory allocations, + * because we target architectures where this may not be available. It is + * essentially a resizable (within specified parameters) block of bytes, + * which is initially created either by memory allocation or TODO as a pointer + * into some fixed rewritable space. + * + * Unlike the context object, this cannot safely be shared between threads + * without additional synchronization logic. + */ +typedef struct secp256k1_scratch_space_struct secp256k1_scratch_space; + /** Opaque data structure that holds a parsed and valid public key. * * The exact representation of data inside is implementation defined and not @@ -243,6 +256,28 @@ SECP256K1_API void secp256k1_context_set_error_callback( const void* data ) SECP256K1_ARG_NONNULL(1); +/** Create a secp256k1 scratch space object. + * + * Returns: a newly created scratch space. + * Args: ctx: an existing context object (cannot be NULL) + * In: init_size: initial amount of memory to allocate + * max_size: maximum amount of memory to allocate + */ +SECP256K1_API SECP256K1_WARN_UNUSED_RESULT secp256k1_scratch_space* secp256k1_scratch_space_create( + const secp256k1_context* ctx, + size_t init_size, + size_t max_size +) SECP256K1_ARG_NONNULL(1); + +/** Destroy a secp256k1 scratch space. + * + * The pointer may not be used afterwards. + * Args: scratch: space to destroy + */ +SECP256K1_API void secp256k1_scratch_space_destroy( + secp256k1_scratch_space* scratch +); + /** Parse a variable-length public key into the pubkey object. * * Returns: 1 if the public key was fully valid. diff --git a/src/scratch.h b/src/scratch.h new file mode 100644 index 0000000000000..aba56e215eb23 --- /dev/null +++ b/src/scratch.h @@ -0,0 +1,35 @@ +/********************************************************************** + * Copyright (c) 2017 Andrew Poelstra * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_SCRATCH_ +#define _SECP256K1_SCRATCH_ + +/* The typedef is used internally; the struct name is used in the public API + * (where it is exposed as a different typedef) */ +typedef struct secp256k1_scratch_space_struct { + void *data; + size_t offset; + size_t init_size; + size_t max_size; + const secp256k1_callback* error_callback; +} secp256k1_scratch; + +static secp256k1_scratch* secp256k1_scratch_create(const secp256k1_callback* error_callback, size_t init_size, size_t max_size); +static void secp256k1_scratch_destroy(secp256k1_scratch* scratch); + +/** Returns the maximum allocation the scratch space will allow */ +static size_t secp256k1_scratch_max_allocation(const secp256k1_scratch* scratch, size_t n_objects); + +/** Attempts to allocate so that there are `n` available bytes. Returns 1 on success, 0 on failure */ +static int secp256k1_scratch_resize(secp256k1_scratch* scratch, size_t n, size_t n_objects); + +/** Returns a pointer into the scratch space or NULL if there is insufficient available space */ +static void *secp256k1_scratch_alloc(secp256k1_scratch* scratch, size_t n); + +/** Resets the returned pointer to the beginning of space */ +static void secp256k1_scratch_reset(secp256k1_scratch* scratch); + +#endif diff --git a/src/scratch_impl.h b/src/scratch_impl.h new file mode 100644 index 0000000000000..9bd68fe100a31 --- /dev/null +++ b/src/scratch_impl.h @@ -0,0 +1,77 @@ +/********************************************************************** + * Copyright (c) 2017 Andrew Poelstra * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ + +#ifndef _SECP256K1_SCRATCH_IMPL_H_ +#define _SECP256K1_SCRATCH_IMPL_H_ + +#include "scratch.h" + +/* Using 16 bytes alignment because common architectures never have alignment + * requirements above 8 for any of the types we care about. In addition we + * leave some room because currently we don't care about a few bytes. + * TODO: Determine this at configure time. */ +#define ALIGNMENT 16 + +static secp256k1_scratch* secp256k1_scratch_create(const secp256k1_callback* error_callback, size_t init_size, size_t max_size) { + secp256k1_scratch* ret = (secp256k1_scratch*)checked_malloc(error_callback, sizeof(*ret)); + if (ret != NULL) { + ret->data = checked_malloc(error_callback, init_size); + if (ret->data == NULL) { + free (ret); + return NULL; + } + ret->offset = 0; + ret->init_size = init_size; + ret->max_size = max_size; + ret->error_callback = error_callback; + } + return ret; +} + +static void secp256k1_scratch_destroy(secp256k1_scratch* scratch) { + if (scratch != NULL) { + free(scratch->data); + free(scratch); + } +} + +static size_t secp256k1_scratch_max_allocation(const secp256k1_scratch* scratch, size_t objects) { + if (scratch->max_size <= objects * ALIGNMENT) { + return 0; + } + return scratch->max_size - objects * ALIGNMENT; +} + +static int secp256k1_scratch_resize(secp256k1_scratch* scratch, size_t n, size_t objects) { + n += objects * ALIGNMENT; + if (n > scratch->init_size && n <= scratch->max_size) { + void *tmp = checked_realloc(scratch->error_callback, scratch->data, n); + if (tmp == NULL) { + return 0; + } + scratch->init_size = n; + scratch->data = tmp; + } + return n <= scratch->max_size; +} + +static void *secp256k1_scratch_alloc(secp256k1_scratch* scratch, size_t size) { + void *ret; + size = ((size + ALIGNMENT - 1) / ALIGNMENT) * ALIGNMENT; + if (size + scratch->offset > scratch->init_size) { + return NULL; + } + ret = (void *) ((unsigned char *) scratch->data + scratch->offset); + memset(ret, 0, size); + scratch->offset += size; + return ret; +} + +static void secp256k1_scratch_reset(secp256k1_scratch* scratch) { + scratch->offset = 0; +} + +#endif diff --git a/src/secp256k1.c b/src/secp256k1.c index cecb1550be7bb..4a8bb841afa1a 100644 --- a/src/secp256k1.c +++ b/src/secp256k1.c @@ -17,6 +17,7 @@ #include "ecdsa_impl.h" #include "eckey_impl.h" #include "hash_impl.h" +#include "scratch_impl.h" #define ARG_CHECK(cond) do { \ if (EXPECT(!(cond), 0)) { \ @@ -114,6 +115,17 @@ void secp256k1_context_set_error_callback(secp256k1_context* ctx, void (*fun)(co ctx->error_callback.data = data; } +secp256k1_scratch_space* secp256k1_scratch_space_create(const secp256k1_context* ctx, size_t init_size, size_t max_size) { + VERIFY_CHECK(ctx != NULL); + ARG_CHECK(max_size >= init_size); + + return secp256k1_scratch_create(&ctx->error_callback, init_size, max_size); +} + +void secp256k1_scratch_space_destroy(secp256k1_scratch_space* scratch) { + secp256k1_scratch_destroy(scratch); +} + static int secp256k1_pubkey_load(const secp256k1_context* ctx, secp256k1_ge* ge, const secp256k1_pubkey* pubkey) { if (sizeof(secp256k1_ge_storage) == 64) { /* When the secp256k1_ge_storage type is exactly 64 byte, use its diff --git a/src/tests.c b/src/tests.c index f307b99d5af45..3bd4e2bc77ada 100644 --- a/src/tests.c +++ b/src/tests.c @@ -248,6 +248,41 @@ void run_context_tests(void) { secp256k1_context_destroy(NULL); } +void run_scratch_tests(void) { + int32_t ecount = 0; + secp256k1_context *none = secp256k1_context_create(SECP256K1_CONTEXT_NONE); + secp256k1_scratch_space *scratch; + + /* Test public API */ + secp256k1_context_set_illegal_callback(none, counting_illegal_callback_fn, &ecount); + scratch = secp256k1_scratch_space_create(none, 100, 10); + CHECK(scratch == NULL); + CHECK(ecount == 1); + + scratch = secp256k1_scratch_space_create(none, 100, 100); + CHECK(scratch != NULL); + CHECK(ecount == 1); + secp256k1_scratch_space_destroy(scratch); + + scratch = secp256k1_scratch_space_create(none, 100, 1000); + CHECK(scratch != NULL); + CHECK(ecount == 1); + + /* Test internal API */ + CHECK(secp256k1_scratch_max_allocation(scratch, 0) == 1000); + CHECK(secp256k1_scratch_max_allocation(scratch, 1) < 1000); + CHECK(secp256k1_scratch_resize(scratch, 50, 1) == 1); /* no-op */ + CHECK(secp256k1_scratch_resize(scratch, 200, 1) == 1); + CHECK(secp256k1_scratch_resize(scratch, 950, 1) == 1); + CHECK(secp256k1_scratch_resize(scratch, 1000, 1) == 0); + CHECK(secp256k1_scratch_resize(scratch, 2000, 1) == 0); + CHECK(secp256k1_scratch_max_allocation(scratch, 0) == 1000); + + /* cleanup */ + secp256k1_scratch_space_destroy(scratch); + secp256k1_context_destroy(none); +} + /***** HASH TESTS *****/ void run_sha256_tests(void) { @@ -4451,6 +4486,7 @@ int main(int argc, char **argv) { /* initialize */ run_context_tests(); + run_scratch_tests(); ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY); if (secp256k1_rand_bits(1)) { secp256k1_rand256(run32); diff --git a/src/util.h b/src/util.h index b0441d8e305a4..e0147500f9944 100644 --- a/src/util.h +++ b/src/util.h @@ -76,6 +76,14 @@ static SECP256K1_INLINE void *checked_malloc(const secp256k1_callback* cb, size_ return ret; } +static SECP256K1_INLINE void *checked_realloc(const secp256k1_callback* cb, void *ptr, size_t size) { + void *ret = realloc(ptr, size); + if (ret == NULL) { + secp256k1_callback_call(cb, "Out of memory"); + } + return ret; +} + /* Macro for restrict, when available and not in a VERIFY build. */ #if defined(SECP256K1_BUILD) && defined(VERIFY) # define SECP256K1_RESTRICT From 8c1c831bdb083dfa8b50fac16a0e17a7e1df4064 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 16 Aug 2017 14:45:27 -0700 Subject: [PATCH 2/9] Generalize Strauss to support multiple points API by Andrew Poelstra. --- src/ecmult.h | 9 +- src/ecmult_impl.h | 254 ++++++++++++++++++++++++++++++++++++---------- src/group.h | 3 + src/group_impl.h | 6 ++ 4 files changed, 216 insertions(+), 56 deletions(-) diff --git a/src/ecmult.h b/src/ecmult.h index 6d44aba60b53b..12e54630e2290 100644 --- a/src/ecmult.h +++ b/src/ecmult.h @@ -1,5 +1,5 @@ /********************************************************************** - * Copyright (c) 2013, 2014 Pieter Wuille * + * Copyright (c) 2013, 2014, 2017 Pieter Wuille, Andrew Poelstra * * Distributed under the MIT software license, see the accompanying * * file COPYING or http://www.opensource.org/licenses/mit-license.php.* **********************************************************************/ @@ -9,6 +9,8 @@ #include "num.h" #include "group.h" +#include "scalar.h" +#include "scratch.h" typedef struct { /* For accelerating the computation of a*P + b*G: */ @@ -28,4 +30,9 @@ static int secp256k1_ecmult_context_is_built(const secp256k1_ecmult_context *ctx /** Double multiply: R = na*A + ng*G */ static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng); +typedef int (secp256k1_ecmult_multi_callback)(secp256k1_scalar *sc, secp256k1_ge *pt, size_t idx, void *data); + +/** Multi-multiply: R = inp_g_sc * G + sum_i ni * Ai. */ +static int secp256k1_ecmult_multi(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n); + #endif /* SECP256K1_ECMULT_H */ diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h index 93d3794cb4348..44a27344ecee9 100644 --- a/src/ecmult_impl.h +++ b/src/ecmult_impl.h @@ -1,5 +1,5 @@ /********************************************************************** - * Copyright (c) 2013, 2014 Pieter Wuille * + * Copyright (c) 2013, 2014, 2017 Pieter Wuille, Andrew Poelstra * * Distributed under the MIT software license, see the accompanying * * file COPYING or http://www.opensource.org/licenses/mit-license.php.* **********************************************************************/ @@ -283,50 +283,78 @@ static int secp256k1_ecmult_wnaf(int *wnaf, int len, const secp256k1_scalar *a, return last_set_bit + 1; } -static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) { - secp256k1_ge pre_a[ECMULT_TABLE_SIZE(WINDOW_A)]; - secp256k1_ge tmpa; - secp256k1_fe Z; +struct secp256k1_strauss_point_state { #ifdef USE_ENDOMORPHISM - secp256k1_ge pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)]; secp256k1_scalar na_1, na_lam; - /* Splitted G factors. */ - secp256k1_scalar ng_1, ng_128; int wnaf_na_1[130]; int wnaf_na_lam[130]; int bits_na_1; int bits_na_lam; - int wnaf_ng_1[129]; - int bits_ng_1; - int wnaf_ng_128[129]; - int bits_ng_128; #else int wnaf_na[256]; int bits_na; +#endif + size_t input_pos; +}; + +struct secp256k1_strauss_state { + secp256k1_gej* prej; + secp256k1_fe* zr; + secp256k1_ge* pre_a; +#ifdef USE_ENDOMORPHISM + secp256k1_ge* pre_a_lam; +#endif + struct secp256k1_strauss_point_state* ps; +}; + +static void secp256k1_ecmult_strauss_wnaf(const secp256k1_ecmult_context *ctx, const struct secp256k1_strauss_state *state, secp256k1_gej *r, int num, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) { + secp256k1_ge tmpa; + secp256k1_fe Z; +#ifdef USE_ENDOMORPHISM + /* Splitted G factors. */ + secp256k1_scalar ng_1, ng_128; + int wnaf_ng_1[129]; + int bits_ng_1 = 0; + int wnaf_ng_128[129]; + int bits_ng_128 = 0; +#else int wnaf_ng[256]; - int bits_ng; + int bits_ng = 0; #endif int i; - int bits; + int bits = 0; + int np; + int no = 0; + for (np = 0; np < num; ++np) { + if (secp256k1_scalar_is_zero(&na[np]) || secp256k1_gej_is_infinity(&a[np])) { + continue; + } + state->ps[no].input_pos = np; #ifdef USE_ENDOMORPHISM - /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */ - secp256k1_scalar_split_lambda(&na_1, &na_lam, na); - - /* build wnaf representation for na_1 and na_lam. */ - bits_na_1 = secp256k1_ecmult_wnaf(wnaf_na_1, 130, &na_1, WINDOW_A); - bits_na_lam = secp256k1_ecmult_wnaf(wnaf_na_lam, 130, &na_lam, WINDOW_A); - VERIFY_CHECK(bits_na_1 <= 130); - VERIFY_CHECK(bits_na_lam <= 130); - bits = bits_na_1; - if (bits_na_lam > bits) { - bits = bits_na_lam; - } + /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */ + secp256k1_scalar_split_lambda(&state->ps[no].na_1, &state->ps[no].na_lam, &na[np]); + + /* build wnaf representation for na_1 and na_lam. */ + state->ps[no].bits_na_1 = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na_1, 130, &state->ps[no].na_1, WINDOW_A); + state->ps[no].bits_na_lam = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na_lam, 130, &state->ps[no].na_lam, WINDOW_A); + VERIFY_CHECK(state->ps[no].bits_na_1 <= 130); + VERIFY_CHECK(state->ps[no].bits_na_lam <= 130); + if (state->ps[no].bits_na_1 > bits) { + bits = state->ps[no].bits_na_1; + } + if (state->ps[no].bits_na_lam > bits) { + bits = state->ps[no].bits_na_lam; + } #else - /* build wnaf representation for na. */ - bits_na = secp256k1_ecmult_wnaf(wnaf_na, 256, na, WINDOW_A); - bits = bits_na; + /* build wnaf representation for na. */ + state->ps[no].bits_na = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na, 256, &na[np], WINDOW_A); + if (state->ps[no].bits_na > bits) { + bits = state->ps[no].bits_na; + } #endif + ++no; + } /* Calculate odd multiples of a. * All multiples are brought to the same Z 'denominator', which is stored @@ -338,29 +366,51 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej * of 1/Z, so we can use secp256k1_gej_add_zinv_var, which uses the same * isomorphism to efficiently add with a known Z inverse. */ - secp256k1_ecmult_odd_multiples_table_globalz_windowa(pre_a, &Z, a); + if (no > 0) { + /* Compute the odd multiples in Jacobian form. */ + secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->prej, state->zr, &a[state->ps[0].input_pos]); + for (np = 1; np < no; ++np) { + secp256k1_gej tmp = a[state->ps[np].input_pos]; +#ifdef VERIFY + secp256k1_fe_normalize_var(&(state->prej[(np - 1) * ECMULT_TABLE_SIZE(WINDOW_A) + ECMULT_TABLE_SIZE(WINDOW_A) - 1].z)); +#endif + secp256k1_gej_rescale(&tmp, &(state->prej[(np - 1) * ECMULT_TABLE_SIZE(WINDOW_A) + ECMULT_TABLE_SIZE(WINDOW_A) - 1].z)); + secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->prej + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &tmp); + secp256k1_fe_mul(state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &(a[state->ps[np].input_pos].z)); + } + /* Bring them to the same Z denominator. */ + secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A) * no, state->pre_a, &Z, state->prej, state->zr); + } else { + secp256k1_fe_set_int(&Z, 1); + } #ifdef USE_ENDOMORPHISM - for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) { - secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]); + for (np = 0; np < no; ++np) { + for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) { + secp256k1_ge_mul_lambda(&state->pre_a_lam[np * ECMULT_TABLE_SIZE(WINDOW_A) + i], &state->pre_a[np * ECMULT_TABLE_SIZE(WINDOW_A) + i]); + } } - /* split ng into ng_1 and ng_128 (where gn = gn_1 + gn_128*2^128, and gn_1 and gn_128 are ~128 bit) */ - secp256k1_scalar_split_128(&ng_1, &ng_128, ng); + if (ng) { + /* split ng into ng_1 and ng_128 (where gn = gn_1 + gn_128*2^128, and gn_1 and gn_128 are ~128 bit) */ + secp256k1_scalar_split_128(&ng_1, &ng_128, ng); - /* Build wnaf representation for ng_1 and ng_128 */ - bits_ng_1 = secp256k1_ecmult_wnaf(wnaf_ng_1, 129, &ng_1, WINDOW_G); - bits_ng_128 = secp256k1_ecmult_wnaf(wnaf_ng_128, 129, &ng_128, WINDOW_G); - if (bits_ng_1 > bits) { - bits = bits_ng_1; - } - if (bits_ng_128 > bits) { - bits = bits_ng_128; + /* Build wnaf representation for ng_1 and ng_128 */ + bits_ng_1 = secp256k1_ecmult_wnaf(wnaf_ng_1, 129, &ng_1, WINDOW_G); + bits_ng_128 = secp256k1_ecmult_wnaf(wnaf_ng_128, 129, &ng_128, WINDOW_G); + if (bits_ng_1 > bits) { + bits = bits_ng_1; + } + if (bits_ng_128 > bits) { + bits = bits_ng_128; + } } #else - bits_ng = secp256k1_ecmult_wnaf(wnaf_ng, 256, ng, WINDOW_G); - if (bits_ng > bits) { - bits = bits_ng; + if (ng) { + bits_ng = secp256k1_ecmult_wnaf(wnaf_ng, 256, ng, WINDOW_G); + if (bits_ng > bits) { + bits = bits_ng; + } } #endif @@ -370,13 +420,15 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej int n; secp256k1_gej_double_var(r, r, NULL); #ifdef USE_ENDOMORPHISM - if (i < bits_na_1 && (n = wnaf_na_1[i])) { - ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A); - secp256k1_gej_add_ge_var(r, r, &tmpa, NULL); - } - if (i < bits_na_lam && (n = wnaf_na_lam[i])) { - ECMULT_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A); - secp256k1_gej_add_ge_var(r, r, &tmpa, NULL); + for (np = 0; np < no; ++np) { + if (i < state->ps[np].bits_na_1 && (n = state->ps[np].wnaf_na_1[i])) { + ECMULT_TABLE_GET_GE(&tmpa, state->pre_a + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A); + secp256k1_gej_add_ge_var(r, r, &tmpa, NULL); + } + if (i < state->ps[np].bits_na_lam && (n = state->ps[np].wnaf_na_lam[i])) { + ECMULT_TABLE_GET_GE(&tmpa, state->pre_a_lam + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A); + secp256k1_gej_add_ge_var(r, r, &tmpa, NULL); + } } if (i < bits_ng_1 && (n = wnaf_ng_1[i])) { ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G); @@ -387,9 +439,11 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z); } #else - if (i < bits_na && (n = wnaf_na[i])) { - ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A); - secp256k1_gej_add_ge_var(r, r, &tmpa, NULL); + for (np = 0; np < no; ++np) { + if (i < state->ps[np].bits_na && (n = state->ps[np].wnaf_na[i])) { + ECMULT_TABLE_GET_GE(&tmpa, state->pre_a + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A); + secp256k1_gej_add_ge_var(r, r, &tmpa, NULL); + } } if (i < bits_ng && (n = wnaf_ng[i])) { ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G); @@ -403,4 +457,94 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej } } +static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) { + secp256k1_gej prej[ECMULT_TABLE_SIZE(WINDOW_A)]; + secp256k1_fe zr[ECMULT_TABLE_SIZE(WINDOW_A)]; + secp256k1_ge pre_a[ECMULT_TABLE_SIZE(WINDOW_A)]; + struct secp256k1_strauss_point_state ps[1]; +#ifdef USE_ENDOMORPHISM + secp256k1_ge pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)]; +#endif + struct secp256k1_strauss_state state; + + state.prej = prej; + state.zr = zr; + state.pre_a = pre_a; +#ifdef USE_ENDOMORPHISM + state.pre_a_lam = pre_a_lam; +#endif + state.ps = ps; + secp256k1_ecmult_strauss_wnaf(ctx, &state, r, 1, a, na, ng); +} + +static int secp256k1_ecmult_multi_split_strauss_wnaf(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) { + secp256k1_gej* points; + secp256k1_scalar* scalars; + secp256k1_gej acc; + size_t in_pos = 0, out_pos = 0; + int first = 1; + +#ifdef USE_ENDOMORPHISM + static const size_t point_size = (sizeof(secp256k1_gej) + sizeof(secp256k1_fe) + sizeof(secp256k1_ge) * 2) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar); +#else + static const size_t point_size = (sizeof(secp256k1_gej) + sizeof(secp256k1_fe) + sizeof(secp256k1_ge)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar); +#endif + + size_t max_points = secp256k1_scratch_max_allocation(scratch, 6) / point_size; + size_t n_batches, points_per_batch; + struct secp256k1_strauss_state state; + + if (max_points == 0) return 0; + if (max_points > 160) max_points = 160; /* At this point, gains are not longer compensating for locality degradation */ + n_batches = (n + max_points - 1) / max_points; + points_per_batch = (n + n_batches - 1) / n_batches; + + /* Attempt to allocate sufficient space for Strauss */ + while (!secp256k1_scratch_resize(scratch, max_points * point_size, 6)) { + max_points /= 2; + if (max_points == 0) { + return 0; + } + } + + secp256k1_scratch_reset(scratch); + points = (secp256k1_gej*)secp256k1_scratch_alloc(scratch, max_points * sizeof(secp256k1_gej)); + scalars = (secp256k1_scalar*)secp256k1_scratch_alloc(scratch, max_points * sizeof(secp256k1_scalar)); + state.prej = (secp256k1_gej*)secp256k1_scratch_alloc(scratch, max_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_gej)); + state.zr = (secp256k1_fe*)secp256k1_scratch_alloc(scratch, max_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_fe)); +#ifdef USE_ENDOMORPHISM + state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(scratch, max_points * 2 * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge)); + state.pre_a_lam = state.pre_a + max_points * ECMULT_TABLE_SIZE(WINDOW_A); +#else + state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(scratch, max_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge)); +#endif + state.ps = (struct secp256k1_strauss_point_state*)secp256k1_scratch_alloc(scratch, max_points * sizeof(struct secp256k1_strauss_point_state)); + + if (n == 0 && inp_g_sc) { + secp256k1_ecmult_strauss_wnaf(ctx, &state, r, 0, NULL, NULL, inp_g_sc); + return 1; + } + + while (in_pos < n) { + secp256k1_ge point; + if (!cb(&scalars[out_pos], &point, in_pos, cbdata)) return 0; + secp256k1_gej_set_ge(&points[out_pos], &point); + ++in_pos; + ++out_pos; + if (out_pos == points_per_batch || in_pos == n) { + secp256k1_ecmult_strauss_wnaf(ctx, &state, first ? r : &acc, out_pos, points, scalars, first ? inp_g_sc : NULL); + if (!first) { + secp256k1_gej_add_var(r, r, &acc, NULL); + } + first = 0; + out_pos = 0; + } + } + return 1; +} + +static int secp256k1_ecmult_multi(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) { + return secp256k1_ecmult_multi_split_strauss_wnaf(ctx, scratch, r, inp_g_sc, cb, cbdata, n); +} + #endif /* SECP256K1_ECMULT_IMPL_H */ diff --git a/src/group.h b/src/group.h index ea1302deb8296..3947ea2ddafa3 100644 --- a/src/group.h +++ b/src/group.h @@ -79,6 +79,9 @@ static void secp256k1_ge_set_table_gej_var(secp256k1_ge *r, const secp256k1_gej * stored in globalz. */ static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge *r, secp256k1_fe *globalz, const secp256k1_gej *a, const secp256k1_fe *zr); +/** Set a group element (affine) equal to the point at infinity. */ +static void secp256k1_ge_set_infinity(secp256k1_ge *r); + /** Set a group element (jacobian) equal to the point at infinity. */ static void secp256k1_gej_set_infinity(secp256k1_gej *r); diff --git a/src/group_impl.h b/src/group_impl.h index b31b6c12efe33..b1ace87b6ffd0 100644 --- a/src/group_impl.h +++ b/src/group_impl.h @@ -200,6 +200,12 @@ static void secp256k1_gej_set_infinity(secp256k1_gej *r) { secp256k1_fe_clear(&r->z); } +static void secp256k1_ge_set_infinity(secp256k1_ge *r) { + r->infinity = 1; + secp256k1_fe_clear(&r->x); + secp256k1_fe_clear(&r->y); +} + static void secp256k1_gej_clear(secp256k1_gej *r) { r->infinity = 0; secp256k1_fe_clear(&r->x); From dba5471b6960a32ce067eff0365fc8931a931baa Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Wed, 16 Aug 2017 14:45:48 -0700 Subject: [PATCH 3/9] Add ecmult_multi tests --- src/tests.c | 238 +++++++++++++++++++++++++++++++++++++++++ src/tests_exhaustive.c | 41 +++++++ 2 files changed, 279 insertions(+) diff --git a/src/tests.c b/src/tests.c index 3bd4e2bc77ada..7a779ec9ea3ff 100644 --- a/src/tests.c +++ b/src/tests.c @@ -2522,6 +2522,243 @@ void run_ecmult_const_tests(void) { ecmult_const_chain_multiply(); } +typedef struct { + secp256k1_scalar *sc; + secp256k1_ge *pt; +} ecmult_multi_data; + +static int ecmult_multi_callback(secp256k1_scalar *sc, secp256k1_ge *pt, size_t idx, void *cbdata) { + ecmult_multi_data *data = (ecmult_multi_data*) cbdata; + *sc = data->sc[idx]; + *pt = data->pt[idx]; + return 1; +} + +void run_ecmult_multi_tests(void) { + int ncount; + secp256k1_scalar szero; + secp256k1_scalar sc[32]; + secp256k1_ge pt[32]; + secp256k1_gej r; + secp256k1_gej r2; + ecmult_multi_data data; + secp256k1_scratch *scratch = secp256k1_scratch_create(&ctx->error_callback, 1024, 8192); + + data.sc = sc; + data.pt = pt; + + secp256k1_scalar_set_int(&szero, 0); + + /* Check 1- and 2-point multiplies against ecmult */ + for (ncount = 0; ncount < count; ncount++) { + secp256k1_ge ptg; + secp256k1_gej ptgj; + random_scalar_order(&sc[0]); + random_scalar_order(&sc[1]); + + random_group_element_test(&ptg); + secp256k1_gej_set_ge(&ptgj, &ptg); + pt[0] = ptg; + pt[1] = secp256k1_ge_const_g; + + /* 1-point */ + secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &ptgj, &sc[0], &szero); + CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 1)); + secp256k1_gej_neg(&r2, &r2); + secp256k1_gej_add_var(&r, &r, &r2, NULL); + CHECK(secp256k1_gej_is_infinity(&r)); + + /* 2-point */ + secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &ptgj, &sc[0], &sc[1]); + CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 2)); + secp256k1_gej_neg(&r2, &r2); + secp256k1_gej_add_var(&r, &r, &r2, NULL); + CHECK(secp256k1_gej_is_infinity(&r)); + + /* 2-point with G scalar */ + secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &ptgj, &sc[0], &sc[1]); + CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &sc[1], ecmult_multi_callback, &data, 1)); + secp256k1_gej_neg(&r2, &r2); + secp256k1_gej_add_var(&r, &r, &r2, NULL); + CHECK(secp256k1_gej_is_infinity(&r)); + } + + /* Check infinite outputs of various forms */ + for (ncount = 0; ncount < count; ncount++) { + secp256k1_ge ptg; + size_t i, j; + size_t sizes[] = { 2, 10, 32 }; + + for (j = 0; j < 3; j++) { + for (i = 0; i < 32; i++) { + random_scalar_order(&sc[i]); + secp256k1_ge_set_infinity(&pt[i]); + } + CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, sizes[j])); + CHECK(secp256k1_gej_is_infinity(&r)); + } + + for (j = 0; j < 3; j++) { + for (i = 0; i < 32; i++) { + random_group_element_test(&ptg); + pt[i] = ptg; + secp256k1_scalar_set_int(&sc[i], 0); + } + CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, sizes[j])); + CHECK(secp256k1_gej_is_infinity(&r)); + } + + for (j = 0; j < 3; j++) { + random_group_element_test(&ptg); + for (i = 0; i < 16; i++) { + random_scalar_order(&sc[2*i]); + secp256k1_scalar_negate(&sc[2*i + 1], &sc[2*i]); + pt[2 * i] = ptg; + pt[2 * i + 1] = ptg; + } + + CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, sizes[j])); + CHECK(secp256k1_gej_is_infinity(&r)); + + random_scalar_order(&sc[0]); + for (i = 0; i < 16; i++) { + random_group_element_test(&ptg); + + sc[2*i] = sc[0]; + sc[2*i+1] = sc[0]; + pt[2 * i] = ptg; + secp256k1_ge_neg(&pt[2*i+1], &pt[2*i]); + } + + CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, sizes[j])); + CHECK(secp256k1_gej_is_infinity(&r)); + } + + random_group_element_test(&ptg); + secp256k1_scalar_set_int(&sc[0], 0); + pt[0] = ptg; + for (i = 1; i < 32; i++) { + pt[i] = ptg; + + random_scalar_order(&sc[i]); + secp256k1_scalar_add(&sc[0], &sc[0], &sc[i]); + secp256k1_scalar_negate(&sc[i], &sc[i]); + } + + CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 32)); + CHECK(secp256k1_gej_is_infinity(&r)); + } + + /* Check random points, constant scalar */ + for (ncount = 0; ncount < count; ncount++) { + size_t i; + secp256k1_gej_set_infinity(&r); + + random_scalar_order(&sc[0]); + for (i = 0; i < 20; i++) { + secp256k1_ge ptg; + sc[i] = sc[0]; + random_group_element_test(&ptg); + pt[i] = ptg; + secp256k1_gej_add_ge_var(&r, &r, &pt[i], NULL); + } + + secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &r, &sc[0], &szero); + CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 20)); + secp256k1_gej_neg(&r2, &r2); + secp256k1_gej_add_var(&r, &r, &r2, NULL); + CHECK(secp256k1_gej_is_infinity(&r)); + } + + /* Check random scalars, constant point */ + for (ncount = 0; ncount < count; ncount++) { + size_t i; + secp256k1_ge ptg; + secp256k1_gej p0j; + secp256k1_scalar rs; + secp256k1_scalar_set_int(&rs, 0); + + random_group_element_test(&ptg); + for (i = 0; i < 20; i++) { + random_scalar_order(&sc[i]); + pt[i] = ptg; + secp256k1_scalar_add(&rs, &rs, &sc[i]); + } + + secp256k1_gej_set_ge(&p0j, &pt[0]); + secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &p0j, &rs, &szero); + CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 20)); + secp256k1_gej_neg(&r2, &r2); + secp256k1_gej_add_var(&r, &r, &r2, NULL); + CHECK(secp256k1_gej_is_infinity(&r)); + } + + /* Sanity check that zero scalars don't cause problems */ + secp256k1_scalar_clear(&sc[0]); + CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 20)); + secp256k1_scalar_clear(&sc[1]); + secp256k1_scalar_clear(&sc[2]); + secp256k1_scalar_clear(&sc[3]); + secp256k1_scalar_clear(&sc[4]); + CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 6)); + CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 5)); + CHECK(secp256k1_gej_is_infinity(&r)); + + /* Run through s0*(t0*P) + s1*(t1*P) exhaustively for many small values of s0, s1, t0, t1 */ + { + const size_t TOP = 8; + size_t s0i, s1i; + size_t t0i, t1i; + secp256k1_ge ptg; + secp256k1_gej ptgj; + + random_group_element_test(&ptg); + secp256k1_gej_set_ge(&ptgj, &ptg); + + for(t0i = 0; t0i < TOP; t0i++) { + for(t1i = 0; t1i < TOP; t1i++) { + secp256k1_gej t0p, t1p; + secp256k1_scalar t0, t1; + + secp256k1_scalar_set_int(&t0, (t0i + 1) / 2); + secp256k1_scalar_cond_negate(&t0, t0i & 1); + secp256k1_scalar_set_int(&t1, (t1i + 1) / 2); + secp256k1_scalar_cond_negate(&t1, t1i & 1); + + secp256k1_ecmult(&ctx->ecmult_ctx, &t0p, &ptgj, &t0, &szero); + secp256k1_ecmult(&ctx->ecmult_ctx, &t1p, &ptgj, &t1, &szero); + + for(s0i = 0; s0i < TOP; s0i++) { + for(s1i = 0; s1i < TOP; s1i++) { + secp256k1_scalar tmp1, tmp2; + secp256k1_gej expected, actual; + + secp256k1_ge_set_gej(&pt[0], &t0p); + secp256k1_ge_set_gej(&pt[1], &t1p); + + secp256k1_scalar_set_int(&sc[0], (s0i + 1) / 2); + secp256k1_scalar_cond_negate(&sc[0], s0i & 1); + secp256k1_scalar_set_int(&sc[1], (s1i + 1) / 2); + secp256k1_scalar_cond_negate(&sc[1], s1i & 1); + + secp256k1_scalar_mul(&tmp1, &t0, &sc[0]); + secp256k1_scalar_mul(&tmp2, &t1, &sc[1]); + secp256k1_scalar_add(&tmp1, &tmp1, &tmp2); + + secp256k1_ecmult(&ctx->ecmult_ctx, &expected, &ptgj, &tmp1, &szero); + CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &actual, &szero, ecmult_multi_callback, &data, 2)); + secp256k1_gej_neg(&expected, &expected); + secp256k1_gej_add_var(&actual, &actual, &expected, NULL); + CHECK(secp256k1_gej_is_infinity(&actual)); + } + } + } + } + } + + secp256k1_scratch_destroy(scratch); +} + void test_wnaf(const secp256k1_scalar *number, int w) { secp256k1_scalar x, two, t; int wnaf[256]; @@ -4528,6 +4765,7 @@ int main(int argc, char **argv) { run_ecmult_constants(); run_ecmult_gen_blind(); run_ecmult_const_tests(); + run_ecmult_multi_tests(); run_ec_combine(); /* endomorphism tests */ diff --git a/src/tests_exhaustive.c b/src/tests_exhaustive.c index b040bb0733ddf..c5e86ef8c835a 100644 --- a/src/tests_exhaustive.c +++ b/src/tests_exhaustive.c @@ -182,6 +182,46 @@ void test_exhaustive_ecmult(const secp256k1_context *ctx, const secp256k1_ge *gr } } +typedef struct { + secp256k1_scalar sc[2]; + secp256k1_ge pt[2]; +} ecmult_multi_data; + +static int ecmult_multi_callback(secp256k1_scalar *sc, secp256k1_ge *pt, size_t idx, void *cbdata) { + ecmult_multi_data *data = (ecmult_multi_data*) cbdata; + *sc = data->sc[idx]; + *pt = data->pt[idx]; + return 1; +} + +void test_exhaustive_ecmult_multi(const secp256k1_context *ctx, const secp256k1_ge *group, int order) { + int i, j, k, x, y; + secp256k1_scratch *scratch = secp256k1_scratch_create(&ctx->error_callback, 1024, 4096); + for (i = 0; i < order; i++) { + for (j = 0; j < order; j++) { + for (k = 0; k < order; k++) { + for (x = 0; x < order; x++) { + for (y = 0; y < order; y++) { + secp256k1_gej tmp; + secp256k1_scalar g_sc; + ecmult_multi_data data; + + secp256k1_scalar_set_int(&data.sc[0], i); + secp256k1_scalar_set_int(&data.sc[1], j); + secp256k1_scalar_set_int(&g_sc, k); + data.pt[0] = group[x]; + data.pt[1] = group[y]; + + secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &tmp, &g_sc, ecmult_multi_callback, &data, 2); + ge_equals_gej(&group[(i * x + j * y + k) % order], &tmp); + } + } + } + } + } + secp256k1_scratch_destroy(scratch); +} + void r_from_k(secp256k1_scalar *r, const secp256k1_ge *group, int k) { secp256k1_fe x; unsigned char x_bin[32]; @@ -456,6 +496,7 @@ int main(void) { #endif test_exhaustive_addition(group, groupj, EXHAUSTIVE_TEST_ORDER); test_exhaustive_ecmult(ctx, group, groupj, EXHAUSTIVE_TEST_ORDER); + test_exhaustive_ecmult_multi(ctx, group, EXHAUSTIVE_TEST_ORDER); test_exhaustive_sign(ctx, group, EXHAUSTIVE_TEST_ORDER); test_exhaustive_verify(ctx, group, EXHAUSTIVE_TEST_ORDER); From bc65aa794e10ee5f0261821bd2f374843c747d5a Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 12 Sep 2017 20:05:39 -0700 Subject: [PATCH 4/9] Add bench_ecmult --- Makefile.am | 6 +- src/bench_ecmult.c | 181 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+), 1 deletion(-) create mode 100644 src/bench_ecmult.c diff --git a/Makefile.am b/Makefile.am index 7603cc6261361..01fd0cd6de39e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -81,7 +81,7 @@ libsecp256k1_jni_la_CPPFLAGS = -DSECP256K1_BUILD $(JNI_INCLUDES) noinst_PROGRAMS = if USE_BENCHMARK -noinst_PROGRAMS += bench_verify bench_sign bench_internal +noinst_PROGRAMS += bench_verify bench_sign bench_internal bench_ecmult bench_verify_SOURCES = src/bench_verify.c bench_verify_LDADD = libsecp256k1.la $(SECP_LIBS) $(SECP_TEST_LIBS) $(COMMON_LIB) bench_sign_SOURCES = src/bench_sign.c @@ -89,6 +89,9 @@ bench_sign_LDADD = libsecp256k1.la $(SECP_LIBS) $(SECP_TEST_LIBS) $(COMMON_LIB) bench_internal_SOURCES = src/bench_internal.c bench_internal_LDADD = $(SECP_LIBS) $(COMMON_LIB) bench_internal_CPPFLAGS = -DSECP256K1_BUILD $(SECP_INCLUDES) +bench_ecmult_SOURCES = src/bench_ecmult.c +bench_ecmult_LDADD = $(SECP_LIBS) $(COMMON_LIB) +bench_ecmult_CPPFLAGS = -DSECP256K1_BUILD $(SECP_INCLUDES) endif TESTS = @@ -161,6 +164,7 @@ $(gen_context_BIN): $(gen_context_OBJECTS) $(libsecp256k1_la_OBJECTS): src/ecmult_static_context.h $(tests_OBJECTS): src/ecmult_static_context.h $(bench_internal_OBJECTS): src/ecmult_static_context.h +$(bench_ecmult_OBJECTS): src/ecmult_static_context.h src/ecmult_static_context.h: $(gen_context_BIN) ./$(gen_context_BIN) diff --git a/src/bench_ecmult.c b/src/bench_ecmult.c new file mode 100644 index 0000000000000..b19819b26b854 --- /dev/null +++ b/src/bench_ecmult.c @@ -0,0 +1,181 @@ +/********************************************************************** + * Copyright (c) 2017 Pieter Wuille * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php.* + **********************************************************************/ +#include + +#include "include/secp256k1.h" + +#include "util.h" +#include "hash_impl.h" +#include "num_impl.h" +#include "field_impl.h" +#include "group_impl.h" +#include "scalar_impl.h" +#include "ecmult_impl.h" +#include "bench.h" +#include "secp256k1.c" + +#define POINTS 32768 +#define ITERS 10000 + +typedef struct { + /* Setup once in advance */ + secp256k1_context* ctx; + secp256k1_scratch_space* scratch; + secp256k1_scalar* scalars; + secp256k1_ge* pubkeys; + secp256k1_scalar* seckeys; + secp256k1_gej* expected_output; + + /* Changes per test */ + size_t count; + int includes_g; + + /* Changes per test iteration */ + size_t offset1; + size_t offset2; + + /* Test output. */ + secp256k1_gej* output; +} bench_data; + +static int bench_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, void* arg) { + bench_data* data = (bench_data*)arg; + if (data->includes_g) ++idx; + if (idx == 0) { + *sc = data->scalars[data->offset1]; + *ge = secp256k1_ge_const_g; + } else { + *sc = data->scalars[(data->offset1 + idx) % POINTS]; + *ge = data->pubkeys[(data->offset2 + idx - 1) % POINTS]; + } + return 1; +} + +static void bench_ecmult(void* arg) { + bench_data* data = (bench_data*)arg; + + size_t count = data->count; + int includes_g = data->includes_g; + size_t iters = 1 + ITERS / count; + size_t iter; + + for (iter = 0; iter < iters; ++iter) { + secp256k1_ecmult_multi(&data->ctx->ecmult_ctx, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_callback, arg, count - includes_g); + data->offset1 = (data->offset1 + count) % POINTS; + data->offset2 = (data->offset2 + count - 1) % POINTS; + } +} + +static void bench_ecmult_setup(void* arg) { + bench_data* data = (bench_data*)arg; + data->offset1 = (data->count * 0x537b7f6f + 0x8f66a481) % POINTS; + data->offset2 = (data->count * 0x7f6f537b + 0x6a1a8f49) % POINTS; +} + +static void bench_ecmult_teardown(void* arg) { + bench_data* data = (bench_data*)arg; + size_t iters = 1 + ITERS / data->count; + size_t iter; + /* Verify the results in teardown, to avoid doing comparisons while benchmarking. */ + for (iter = 0; iter < iters; ++iter) { + secp256k1_gej tmp; + secp256k1_gej_add_var(&tmp, &data->output[iter], &data->expected_output[iter], NULL); + CHECK(secp256k1_gej_is_infinity(&tmp)); + } +} + +static void generate_scalar(uint32_t num, secp256k1_scalar* scalar) { + secp256k1_sha256 sha256; + unsigned char c[11] = {'e', 'c', 'm', 'u', 'l', 't', 0, 0, 0, 0}; + unsigned char buf[32]; + int overflow = 0; + c[6] = num; + c[7] = num >> 8; + c[8] = num >> 16; + c[9] = num >> 24; + secp256k1_sha256_initialize(&sha256); + secp256k1_sha256_write(&sha256, c, sizeof(c)); + secp256k1_sha256_finalize(&sha256, buf); + secp256k1_scalar_set_b32(scalar, buf, &overflow); + CHECK(!overflow); +} + +static void run_test(bench_data* data, size_t count, int includes_g) { + char str[32]; + static const secp256k1_scalar zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0); + size_t iters = 1 + ITERS / count; + size_t iter; + + data->count = count; + data->includes_g = includes_g; + + /* Compute (the negation of) the expected results directly. */ + data->offset1 = (data->count * 0x537b7f6f + 0x8f66a481) % POINTS; + data->offset2 = (data->count * 0x7f6f537b + 0x6a1a8f49) % POINTS; + for (iter = 0; iter < iters; ++iter) { + secp256k1_scalar tmp; + secp256k1_scalar total = data->scalars[(data->offset1++) % POINTS]; + size_t i = 0; + for (i = 0; i + 1 < count; ++i) { + secp256k1_scalar_mul(&tmp, &data->seckeys[(data->offset2++) % POINTS], &data->scalars[(data->offset1++) % POINTS]); + secp256k1_scalar_add(&total, &total, &tmp); + } + secp256k1_scalar_negate(&total, &total); + secp256k1_ecmult(&data->ctx->ecmult_ctx, &data->expected_output[iter], NULL, &zero, &total); + } + + /* Run the benchmark. */ + sprintf(str, includes_g ? "ecmult_%ig" : "ecmult_%i", (int)count); + run_benchmark(str, bench_ecmult, bench_ecmult_setup, bench_ecmult_teardown, data, 10, count * (1 + ITERS / count)); +} + +int main(int argc, char **argv) { + bench_data data; + int i, p; + secp256k1_gej* pubkeys_gej; + + /* Allocate stuff */ + data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY); + data.scratch = secp256k1_scratch_space_create(data.ctx, POINTS * 1024, POINTS * 5 * 1024); + data.scalars = malloc(sizeof(secp256k1_scalar) * POINTS); + data.seckeys = malloc(sizeof(secp256k1_scalar) * POINTS); + data.pubkeys = malloc(sizeof(secp256k1_ge) * POINTS); + data.expected_output = malloc(sizeof(secp256k1_gej) * (ITERS + 1)); + data.output = malloc(sizeof(secp256k1_gej) * (ITERS + 1)); + + /* Generate a set of scalars, and private/public keypairs. */ + pubkeys_gej = malloc(sizeof(secp256k1_gej) * POINTS); + secp256k1_gej_set_ge(&pubkeys_gej[0], &secp256k1_ge_const_g); + secp256k1_scalar_set_int(&data.seckeys[0], 1); + for (i = 0; i < POINTS; ++i) { + generate_scalar(i, &data.scalars[i]); + if (i) { + secp256k1_gej_double_var(&pubkeys_gej[i], &pubkeys_gej[i - 1], NULL); + secp256k1_scalar_add(&data.seckeys[i], &data.seckeys[i - 1], &data.seckeys[i - 1]); + } + } + secp256k1_ge_set_all_gej_var(data.pubkeys, pubkeys_gej, POINTS, &data.ctx->error_callback); + free(pubkeys_gej); + + for (i = 1; i <= 8; ++i) { + run_test(&data, i, 1); + } + + for (p = 0; p <= 11; ++p) { + for (i = 9; i <= 16; ++i) { + run_test(&data, i << p, 1); + } + } + secp256k1_context_destroy(data.ctx); + secp256k1_scratch_space_destroy(data.scratch); + free(data.scalars); + free(data.pubkeys); + free(data.seckeys); + free(data.output); + free(data.expected_output); + + return(0); +} From 355a38f113925799f38e8b3dbe1e66db1c7e4cfa Mon Sep 17 00:00:00 2001 From: Jonas Nick Date: Thu, 14 Sep 2017 17:55:13 +0200 Subject: [PATCH 5/9] Add pippenger_wnaf ecmult_multi --- src/bench_ecmult.c | 2 +- src/ecmult.h | 9 +- src/ecmult_const_impl.h | 7 - src/ecmult_impl.h | 454 +++++++++++++++++++++++++++++++++++----- src/tests.c | 214 +++++++++++++++++-- src/tests_exhaustive.c | 2 +- 6 files changed, 612 insertions(+), 76 deletions(-) diff --git a/src/bench_ecmult.c b/src/bench_ecmult.c index b19819b26b854..c1b032c5253c8 100644 --- a/src/bench_ecmult.c +++ b/src/bench_ecmult.c @@ -63,7 +63,7 @@ static void bench_ecmult(void* arg) { size_t iter; for (iter = 0; iter < iters; ++iter) { - secp256k1_ecmult_multi(&data->ctx->ecmult_ctx, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_callback, arg, count - includes_g); + secp256k1_ecmult_multi_var(&data->ctx->ecmult_ctx, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_callback, arg, count - includes_g); data->offset1 = (data->offset1 + count) % POINTS; data->offset2 = (data->offset2 + count - 1) % POINTS; } diff --git a/src/ecmult.h b/src/ecmult.h index 12e54630e2290..3b52988e1fe18 100644 --- a/src/ecmult.h +++ b/src/ecmult.h @@ -32,7 +32,12 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej typedef int (secp256k1_ecmult_multi_callback)(secp256k1_scalar *sc, secp256k1_ge *pt, size_t idx, void *data); -/** Multi-multiply: R = inp_g_sc * G + sum_i ni * Ai. */ -static int secp256k1_ecmult_multi(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n); +/** + * Multi-multiply: R = inp_g_sc * G + sum_i ni * Ai. + * Returns: 1 on success (including when inp_g_sc is NULL and n is 0) + * 0 if there is not enough scratch space for a single point or + * callback returns 0 + */ +static int secp256k1_ecmult_multi_var(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n); #endif /* SECP256K1_ECMULT_H */ diff --git a/src/ecmult_const_impl.h b/src/ecmult_const_impl.h index 7d7a172b7b385..fae50020b710f 100644 --- a/src/ecmult_const_impl.h +++ b/src/ecmult_const_impl.h @@ -12,13 +12,6 @@ #include "ecmult_const.h" #include "ecmult_impl.h" -#ifdef USE_ENDOMORPHISM - #define WNAF_BITS 128 -#else - #define WNAF_BITS 256 -#endif -#define WNAF_SIZE(w) ((WNAF_BITS + (w) - 1) / (w)) - /* This is like `ECMULT_TABLE_GET_GE` but is constant time */ #define ECMULT_CONST_TABLE_GET_GE(r,pre,n,w) do { \ int m; \ diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h index 44a27344ecee9..ff4fffcbf9bcd 100644 --- a/src/ecmult_impl.h +++ b/src/ecmult_impl.h @@ -1,8 +1,8 @@ -/********************************************************************** - * Copyright (c) 2013, 2014, 2017 Pieter Wuille, Andrew Poelstra * - * Distributed under the MIT software license, see the accompanying * - * file COPYING or http://www.opensource.org/licenses/mit-license.php.* - **********************************************************************/ +/***************************************************************************** + * Copyright (c) 2013, 2014, 2017 Pieter Wuille, Andrew Poelstra, Jonas Nick * + * Distributed under the MIT software license, see the accompanying * + * file COPYING or http://www.opensource.org/licenses/mit-license.php. * + *****************************************************************************/ #ifndef SECP256K1_ECMULT_IMPL_H #define SECP256K1_ECMULT_IMPL_H @@ -41,9 +41,27 @@ #endif #endif +#ifdef USE_ENDOMORPHISM + #define WNAF_BITS 128 +#else + #define WNAF_BITS 256 +#endif +#define WNAF_SIZE(w) ((WNAF_BITS + (w) - 1) / (w)) + /** The number of entries a table with precomputed multiples needs to have. */ #define ECMULT_TABLE_SIZE(w) (1 << ((w)-2)) +/* The number of objects allocated on the scratch space for ecmult_multi algorithms */ +#define PIPPENGER_SCRATCH_OBJECTS 6 +#define STRAUSS_SCRATCH_OBJECTS 6 + +/* Minimum number of points for which pippenger_wnaf is faster than strauss wnaf */ +#ifdef USE_ENDOMORPHISM + #define ECMULT_PIPPENGER_THRESHOLD 96 +#else + #define ECMULT_PIPPENGER_THRESHOLD 156 +#endif + /** Fill a table 'prej' with precomputed odd multiples of a. Prej will contain * the values [1*a,3*a,...,(2*n-1)*a], so it space for n values. zr[0] will * contain prej[0].z / a.z. The other zr[i] values = prej[i].z / prej[i-1].z. @@ -477,74 +495,414 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej secp256k1_ecmult_strauss_wnaf(ctx, &state, r, 1, a, na, ng); } -static int secp256k1_ecmult_multi_split_strauss_wnaf(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) { +static size_t secp256k1_strauss_scratch_size(size_t n_points) { +#ifdef USE_ENDOMORPHISM + static const size_t point_size = (2 * sizeof(secp256k1_ge) + sizeof(secp256k1_gej) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar); +#else + static const size_t point_size = (sizeof(secp256k1_ge) + sizeof(secp256k1_gej) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar); +#endif + return n_points*point_size; +} + +static int secp256k1_ecmult_strauss_batch(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points, size_t cb_offset) { secp256k1_gej* points; secp256k1_scalar* scalars; - secp256k1_gej acc; - size_t in_pos = 0, out_pos = 0; - int first = 1; + struct secp256k1_strauss_state state; + size_t i; + + secp256k1_gej_set_infinity(r); + if (inp_g_sc == NULL && n_points == 0) { + return 1; + } + if (!secp256k1_scratch_resize(scratch, secp256k1_strauss_scratch_size(n_points), STRAUSS_SCRATCH_OBJECTS)) { + return 0; + } + secp256k1_scratch_reset(scratch); + points = (secp256k1_gej*)secp256k1_scratch_alloc(scratch, n_points * sizeof(secp256k1_gej)); + scalars = (secp256k1_scalar*)secp256k1_scratch_alloc(scratch, n_points * sizeof(secp256k1_scalar)); + state.prej = (secp256k1_gej*)secp256k1_scratch_alloc(scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_gej)); + state.zr = (secp256k1_fe*)secp256k1_scratch_alloc(scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_fe)); #ifdef USE_ENDOMORPHISM - static const size_t point_size = (sizeof(secp256k1_gej) + sizeof(secp256k1_fe) + sizeof(secp256k1_ge) * 2) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar); + state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(scratch, n_points * 2 * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge)); + state.pre_a_lam = state.pre_a + n_points * ECMULT_TABLE_SIZE(WINDOW_A); #else - static const size_t point_size = (sizeof(secp256k1_gej) + sizeof(secp256k1_fe) + sizeof(secp256k1_ge)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar); + state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge)); #endif + state.ps = (struct secp256k1_strauss_point_state*)secp256k1_scratch_alloc(scratch, n_points * sizeof(struct secp256k1_strauss_point_state)); - size_t max_points = secp256k1_scratch_max_allocation(scratch, 6) / point_size; - size_t n_batches, points_per_batch; - struct secp256k1_strauss_state state; + for (i = 0; i < n_points; i++) { + secp256k1_ge point; + if (!cb(&scalars[i], &point, i+cb_offset, cbdata)) return 0; + secp256k1_gej_set_ge(&points[i], &point); + } + secp256k1_ecmult_strauss_wnaf(ctx, &state, r, n_points, points, scalars, inp_g_sc); + return 1; +} - if (max_points == 0) return 0; - if (max_points > 160) max_points = 160; /* At this point, gains are not longer compensating for locality degradation */ - n_batches = (n + max_points - 1) / max_points; - points_per_batch = (n + n_batches - 1) / n_batches; +/* Wrapper for secp256k1_ecmult_multi_func interface */ +static int secp256k1_ecmult_strauss_batch_single(const secp256k1_ecmult_context *actx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) { + return secp256k1_ecmult_strauss_batch(actx, scratch, r, inp_g_sc, cb, cbdata, n, 0); +} - /* Attempt to allocate sufficient space for Strauss */ - while (!secp256k1_scratch_resize(scratch, max_points * point_size, 6)) { - max_points /= 2; - if (max_points == 0) { - return 0; +/** Convert a number to WNAF notation. + * The number becomes represented by sum(2^{wi} * wnaf[i], i=0..WNAF_SIZE(w)+1) - return_val. + * It has the following guarantees: + * - each wnaf[i] is either 0 or an odd integer between -(1 << w) and (1 << w) + * - the number of words set is always WNAF_SIZE(w) + * - the returned skew is 0 without endomorphism, or 0 or 1 with endomorphism + */ +static int secp256k1_wnaf_fixed(int *wnaf, const secp256k1_scalar *s, int w) { + int sign = 0; + int skew = 0; + int pos = 1; +#ifndef USE_ENDOMORPHISM + secp256k1_scalar neg_s; +#endif + const secp256k1_scalar *work = s; + + if (secp256k1_scalar_is_zero(s)) { + while (pos * w < WNAF_BITS) { + wnaf[pos] = 0; + ++pos; } + return 0; } - secp256k1_scratch_reset(scratch); - points = (secp256k1_gej*)secp256k1_scratch_alloc(scratch, max_points * sizeof(secp256k1_gej)); - scalars = (secp256k1_scalar*)secp256k1_scratch_alloc(scratch, max_points * sizeof(secp256k1_scalar)); - state.prej = (secp256k1_gej*)secp256k1_scratch_alloc(scratch, max_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_gej)); - state.zr = (secp256k1_fe*)secp256k1_scratch_alloc(scratch, max_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_fe)); + if (secp256k1_scalar_is_even(s)) { #ifdef USE_ENDOMORPHISM - state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(scratch, max_points * 2 * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge)); - state.pre_a_lam = state.pre_a + max_points * ECMULT_TABLE_SIZE(WINDOW_A); + skew = 1; #else - state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(scratch, max_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge)); + secp256k1_scalar_negate(&neg_s, s); + work = &neg_s; + sign = -1; #endif - state.ps = (struct secp256k1_strauss_point_state*)secp256k1_scratch_alloc(scratch, max_points * sizeof(struct secp256k1_strauss_point_state)); + } - if (n == 0 && inp_g_sc) { - secp256k1_ecmult_strauss_wnaf(ctx, &state, r, 0, NULL, NULL, inp_g_sc); + wnaf[0] = (secp256k1_scalar_get_bits_var(work, 0, w) + skew + sign) ^ sign; + + while (pos * w < WNAF_BITS) { + int now = w; + int val; + if (now + pos * w > WNAF_BITS) { + now = WNAF_BITS - pos * w; + } + val = secp256k1_scalar_get_bits_var(work, pos * w, now); + if ((val & 1) == 0) { + wnaf[pos - 1] -= ((1 << w) + sign) ^ sign; + wnaf[pos] = (val + 1 + sign) ^ sign; + } else { + wnaf[pos] = (val + sign) ^ sign; + } + ++pos; + } + VERIFY_CHECK(pos == WNAF_SIZE(w)); + + return skew; +} + +struct secp256k1_pippenger_point_state { + int skew_na; + size_t input_pos; +}; + +struct secp256k1_pippenger_state { + int *wnaf_na; + struct secp256k1_pippenger_point_state* ps; +}; + +/* + * pippenger_wnaf computes the result of a multi-point multiplication as + * follows: The scalars are brought into wnaf with n_wnaf elements each. Then + * for every i < n_wnaf, first each point is added to a "bucket" corresponding + * to the point's wnaf[i]. Second, the buckets are added together such that + * r += 1*bucket[0] + 3*bucket[1] + 5*bucket[2] + ... + */ +static int secp256k1_ecmult_pippenger_wnaf(secp256k1_gej *buckets, int bucket_window, struct secp256k1_pippenger_state *state, secp256k1_gej *r, secp256k1_scalar *sc, secp256k1_ge *pt, size_t num) { + size_t n_wnaf = WNAF_SIZE(bucket_window+1); + size_t np; + size_t no = 0; + int i; + int j; + + for (np = 0; np < num; ++np) { + if (secp256k1_scalar_is_zero(&sc[np]) || secp256k1_ge_is_infinity(&pt[np])) { + continue; + } + state->ps[no].input_pos = np; + state->ps[no].skew_na = secp256k1_wnaf_fixed(&state->wnaf_na[no*n_wnaf], &sc[np], bucket_window+1); + no++; + } + secp256k1_gej_set_infinity(r); + + if (no == 0) { return 1; } - while (in_pos < n) { - secp256k1_ge point; - if (!cb(&scalars[out_pos], &point, in_pos, cbdata)) return 0; - secp256k1_gej_set_ge(&points[out_pos], &point); - ++in_pos; - ++out_pos; - if (out_pos == points_per_batch || in_pos == n) { - secp256k1_ecmult_strauss_wnaf(ctx, &state, first ? r : &acc, out_pos, points, scalars, first ? inp_g_sc : NULL); - if (!first) { - secp256k1_gej_add_var(r, r, &acc, NULL); + for (i = n_wnaf - 1; i >= 0; i--) { + secp256k1_gej running_sum; + secp256k1_gej walking_sum; + + for(j = 0; j < ECMULT_TABLE_SIZE(bucket_window+2); j++) { + secp256k1_gej_set_infinity(&buckets[j]); + } + for(j = 0; j < bucket_window+1; j++) { + secp256k1_gej_double_var(r, r, NULL); + } + + for (np = 0; np < no; ++np) { + int n = state->wnaf_na[np*n_wnaf + i]; + struct secp256k1_pippenger_point_state point_state = state->ps[np]; + secp256k1_ge tmp; + int idx; + +#ifdef USE_ENDOMORPHISM + if (i == 0) { + /* correct for wnaf skew */ + int skew = point_state.skew_na; + if (skew) { + secp256k1_ge_neg(&tmp, &pt[point_state.input_pos]); + secp256k1_gej_add_ge_var(&buckets[0], &buckets[0], &tmp, NULL); + } + } +#endif + if (n > 0) { + idx = (n - 1)/2; + secp256k1_gej_add_ge_var(&buckets[idx], &buckets[idx], &pt[point_state.input_pos], NULL); + } else if (n < 0) { + idx = -(n + 1)/2; + secp256k1_ge_neg(&tmp, &pt[point_state.input_pos]); + secp256k1_gej_add_ge_var(&buckets[idx], &buckets[idx], &tmp, NULL); } - first = 0; - out_pos = 0; } + secp256k1_gej_set_infinity(&running_sum); + secp256k1_gej_set_infinity(&walking_sum); + /* Compute walking_sum as bucket[0] + 3*bucket[1] + 5*bucket[2] + ... + * by first setting + * running_sum = bucket[0] + bucket[1] + bucket[2] + ... + * walking_sum = bucket[0] + 2*bucket[1] + 3*bucket[2] + ... + * and then computing + * walking_sum = 2*walking_sum - running_sum + */ + for(j = ECMULT_TABLE_SIZE(bucket_window+2) - 1; j >= 0; j--) { + secp256k1_gej_add_var(&running_sum, &running_sum, &buckets[j], NULL); + secp256k1_gej_add_var(&walking_sum, &walking_sum, &running_sum, NULL); + } + + secp256k1_gej_double_var(&walking_sum, &walking_sum, NULL); + secp256k1_gej_neg(&running_sum, &running_sum); + secp256k1_gej_add_var(&walking_sum, &walking_sum, &running_sum, NULL); + secp256k1_gej_add_var(r, r, &walking_sum, NULL); } return 1; } -static int secp256k1_ecmult_multi(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) { - return secp256k1_ecmult_multi_split_strauss_wnaf(ctx, scratch, r, inp_g_sc, cb, cbdata, n); +/** + * Returns optimal bucket_window (number of bits of a scalar represented by a + * set of buckets) for a given number of points. + */ +static int secp256k1_pippenger_bucket_window(size_t n) { +#ifdef USE_ENDOMORPHISM + if (n <= 4) { + return 1; + } else if (n <= 8) { + return 2; + } else if (n <= 40) { + return 3; + } else if (n <= 117) { + return 4; + } else if (n <= 280) { + return 5; + } else if (n <= 480) { + return 6; + } else if (n <= 2560) { + return 7; + } else if (n <= 9200) { + return 9; + } else if (n <= 17400) { + return 10; + } else if (n <= 28600) { + return 11; + } else { + return 12; + } +#else + if (n <= 2) { + return 1; + } else if (n <= 9) { + return 2; + } else if (n <= 42) { + return 3; + } else if (n <= 100) { + return 4; + } else if (n <= 280) { + return 5; + } else if (n <= 610) { + return 6; + } else if (n <= 1920) { + return 7; + } else if (n <= 3400) { + return 8; + } else if (n <= 10240) { + return 9; + } else if (n <= 19000) { + return 10; + } else if (n <= 35000) { + return 11; + } else { + return 12; + } +#endif +} + +#ifdef USE_ENDOMORPHISM +SECP256K1_INLINE static void secp256k1_ecmult_endo_split(secp256k1_scalar *s1, secp256k1_scalar *s2, secp256k1_ge *p1, secp256k1_ge *p2) { + secp256k1_scalar tmp = *s1; + secp256k1_scalar_split_lambda(s1, s2, &tmp); + secp256k1_ge_mul_lambda(p2, p1); + + if (secp256k1_scalar_is_high(s1)) { + secp256k1_scalar_negate(s1, s1); + secp256k1_ge_neg(p1, p1); + } + if (secp256k1_scalar_is_high(s2)) { + secp256k1_scalar_negate(s2, s2); + secp256k1_ge_neg(p2, p2); + } +} +#endif + +/** + * Returns the scratch size required for a given number of points (excluding + * base point G) without considering alignment. + */ +static size_t secp256k1_pippenger_scratch_size(size_t n_points, int bucket_window) { +#ifdef USE_ENDOMORPHISM + size_t entries = 2*n_points + 2; +#else + size_t entries = n_points + 1; +#endif + size_t entry_size = sizeof(secp256k1_ge) + sizeof(secp256k1_scalar) + sizeof(struct secp256k1_pippenger_point_state) + (WNAF_SIZE(bucket_window+1)+1)*sizeof(int); + return ((1<ps = (struct secp256k1_pippenger_point_state *) secp256k1_scratch_alloc(scratch, entries * sizeof(*state_space->ps)); + state_space->wnaf_na = (int *) secp256k1_scratch_alloc(scratch, entries*(WNAF_SIZE(bucket_window+1)) * sizeof(int)); + buckets = (secp256k1_gej *) secp256k1_scratch_alloc(scratch, (1<ps[i].skew_na = 0; + for(j = 0; j < WNAF_SIZE(bucket_window+1); j++) { + state_space->wnaf_na[i * WNAF_SIZE(bucket_window+1) + j] = 0; + } + } + for(i = 0; i < 1<error_callback, 1024, 8192); + secp256k1_scratch *scratch_empty; data.sc = sc; data.pt = pt; - secp256k1_scalar_set_int(&szero, 0); + secp256k1_scratch_reset(scratch); + + /* No points to multiply */ + CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, NULL, ecmult_multi_callback, &data, 0)); /* Check 1- and 2-point multiplies against ecmult */ for (ncount = 0; ncount < count; ncount++) { @@ -2561,23 +2572,38 @@ void run_ecmult_multi_tests(void) { pt[0] = ptg; pt[1] = secp256k1_ge_const_g; + /* only G scalar */ + secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &ptgj, &szero, &sc[0]); + CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &sc[0], ecmult_multi_callback, &data, 0)); + secp256k1_gej_neg(&r2, &r2); + secp256k1_gej_add_var(&r, &r, &r2, NULL); + CHECK(secp256k1_gej_is_infinity(&r)); + /* 1-point */ secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &ptgj, &sc[0], &szero); - CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 1)); + CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 1)); secp256k1_gej_neg(&r2, &r2); secp256k1_gej_add_var(&r, &r, &r2, NULL); CHECK(secp256k1_gej_is_infinity(&r)); + /* Try to multiply 1 point, but scratch space is empty */ + scratch_empty = secp256k1_scratch_create(&ctx->error_callback, 0, 0); + CHECK(!ecmult_multi(&ctx->ecmult_ctx, scratch_empty, &r, &szero, ecmult_multi_callback, &data, 1)); + secp256k1_scratch_destroy(scratch_empty); + + /* Try to multiply 1 point, but callback returns false */ + CHECK(!ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_false_callback, &data, 1)); + /* 2-point */ secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &ptgj, &sc[0], &sc[1]); - CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 2)); + CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 2)); secp256k1_gej_neg(&r2, &r2); secp256k1_gej_add_var(&r, &r, &r2, NULL); CHECK(secp256k1_gej_is_infinity(&r)); /* 2-point with G scalar */ secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &ptgj, &sc[0], &sc[1]); - CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &sc[1], ecmult_multi_callback, &data, 1)); + CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &sc[1], ecmult_multi_callback, &data, 1)); secp256k1_gej_neg(&r2, &r2); secp256k1_gej_add_var(&r, &r, &r2, NULL); CHECK(secp256k1_gej_is_infinity(&r)); @@ -2594,7 +2620,7 @@ void run_ecmult_multi_tests(void) { random_scalar_order(&sc[i]); secp256k1_ge_set_infinity(&pt[i]); } - CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, sizes[j])); + CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, sizes[j])); CHECK(secp256k1_gej_is_infinity(&r)); } @@ -2604,7 +2630,7 @@ void run_ecmult_multi_tests(void) { pt[i] = ptg; secp256k1_scalar_set_int(&sc[i], 0); } - CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, sizes[j])); + CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, sizes[j])); CHECK(secp256k1_gej_is_infinity(&r)); } @@ -2617,7 +2643,7 @@ void run_ecmult_multi_tests(void) { pt[2 * i + 1] = ptg; } - CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, sizes[j])); + CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, sizes[j])); CHECK(secp256k1_gej_is_infinity(&r)); random_scalar_order(&sc[0]); @@ -2630,7 +2656,7 @@ void run_ecmult_multi_tests(void) { secp256k1_ge_neg(&pt[2*i+1], &pt[2*i]); } - CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, sizes[j])); + CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, sizes[j])); CHECK(secp256k1_gej_is_infinity(&r)); } @@ -2645,7 +2671,7 @@ void run_ecmult_multi_tests(void) { secp256k1_scalar_negate(&sc[i], &sc[i]); } - CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 32)); + CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 32)); CHECK(secp256k1_gej_is_infinity(&r)); } @@ -2664,7 +2690,7 @@ void run_ecmult_multi_tests(void) { } secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &r, &sc[0], &szero); - CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 20)); + CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 20)); secp256k1_gej_neg(&r2, &r2); secp256k1_gej_add_var(&r, &r, &r2, NULL); CHECK(secp256k1_gej_is_infinity(&r)); @@ -2687,7 +2713,7 @@ void run_ecmult_multi_tests(void) { secp256k1_gej_set_ge(&p0j, &pt[0]); secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &p0j, &rs, &szero); - CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 20)); + CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 20)); secp256k1_gej_neg(&r2, &r2); secp256k1_gej_add_var(&r, &r, &r2, NULL); CHECK(secp256k1_gej_is_infinity(&r)); @@ -2695,13 +2721,13 @@ void run_ecmult_multi_tests(void) { /* Sanity check that zero scalars don't cause problems */ secp256k1_scalar_clear(&sc[0]); - CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 20)); + CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 20)); secp256k1_scalar_clear(&sc[1]); secp256k1_scalar_clear(&sc[2]); secp256k1_scalar_clear(&sc[3]); secp256k1_scalar_clear(&sc[4]); - CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 6)); - CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 5)); + CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 6)); + CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 5)); CHECK(secp256k1_gej_is_infinity(&r)); /* Run through s0*(t0*P) + s1*(t1*P) exhaustively for many small values of s0, s1, t0, t1 */ @@ -2746,7 +2772,7 @@ void run_ecmult_multi_tests(void) { secp256k1_scalar_add(&tmp1, &tmp1, &tmp2); secp256k1_ecmult(&ctx->ecmult_ctx, &expected, &ptgj, &tmp1, &szero); - CHECK(secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &actual, &szero, ecmult_multi_callback, &data, 2)); + CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &actual, &szero, ecmult_multi_callback, &data, 2)); secp256k1_gej_neg(&expected, &expected); secp256k1_gej_add_var(&actual, &actual, &expected, NULL); CHECK(secp256k1_gej_is_infinity(&actual)); @@ -2755,8 +2781,104 @@ void run_ecmult_multi_tests(void) { } } } +} + +void test_ecmult_multi_batching(void) { + static const int n_points = 3*MAX_BATCH_SIZE; + secp256k1_scalar scG; + secp256k1_scalar szero; + secp256k1_scalar *sc = (secp256k1_scalar *)checked_malloc(&ctx->error_callback, sizeof(secp256k1_scalar) * n_points); + secp256k1_ge *pt = (secp256k1_ge *)checked_malloc(&ctx->error_callback, sizeof(secp256k1_ge) * n_points); + secp256k1_gej r; + secp256k1_gej r2; + ecmult_multi_data data; + int i; + secp256k1_scratch *scratch; + + int test_n_points[] = { MAX_BATCH_SIZE, MAX_BATCH_SIZE + 1, MAX_BATCH_SIZE + 2, 2*MAX_BATCH_SIZE, 2*MAX_BATCH_SIZE+1, 3*MAX_BATCH_SIZE }; + secp256k1_gej_set_infinity(&r2); + secp256k1_scalar_set_int(&szero, 0); + + /* Get random scalars and group elements */ + random_scalar_order(&scG); + secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &r2, &szero, &scG); + for(i = 0; i < n_points; i++) { + secp256k1_ge ptg; + random_group_element_test(&ptg); + pt[i] = ptg; + random_scalar_order(&sc[i]); + } + data.sc = sc; + data.pt = pt; + + /* Test with empty scratch space */ + scratch = secp256k1_scratch_create(&ctx->error_callback, 0, 0); + CHECK(!secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, 1)); + secp256k1_scratch_destroy(scratch); + + /* Test with space for 1 point in pippenger. That's not enough because + * ecmult_multi selects strauss which requires more memory. */ + scratch = secp256k1_scratch_create(&ctx->error_callback, 0, secp256k1_pippenger_scratch_size(1, 1) + PIPPENGER_SCRATCH_OBJECTS*ALIGNMENT); + CHECK(!secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, 1)); + secp256k1_scratch_destroy(scratch); + + /* Run secp256k1_ecmult_multi_var with i points and a scratch space + * restricted to i points. */ + for(i = 1; i <= ECMULT_PIPPENGER_THRESHOLD+2; i++) { + secp256k1_gej ptgj; + if (i > ECMULT_PIPPENGER_THRESHOLD) { + int bucket_window = secp256k1_pippenger_bucket_window(i); + size_t scratch_size = secp256k1_pippenger_scratch_size(i, bucket_window); + scratch = secp256k1_scratch_create(&ctx->error_callback, 0, scratch_size + PIPPENGER_SCRATCH_OBJECTS*ALIGNMENT); + } else { + size_t scratch_size = secp256k1_strauss_scratch_size(i); + scratch = secp256k1_scratch_create(&ctx->error_callback, 0, scratch_size + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT); + } + CHECK(secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, i)); + + /* compute running result */ + secp256k1_gej_set_ge(&ptgj, &pt[i-1]); + secp256k1_ecmult(&ctx->ecmult_ctx, &ptgj, &ptgj, &sc[i-1], NULL); + secp256k1_gej_add_var(&r2, &r2, &ptgj, NULL); + + secp256k1_gej_neg(&r, &r); + secp256k1_gej_add_var(&r, &r, &r2, NULL); + CHECK(secp256k1_gej_is_infinity(&r)); + secp256k1_scratch_destroy(scratch); + } + + scratch = secp256k1_scratch_create(&ctx->error_callback, 0, secp256k1_strauss_scratch_size(n_points) + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT); + + for(i = 0; i < (int)(sizeof(test_n_points) / sizeof(test_n_points[0])); i++) { + secp256k1_gej ptgj; + CHECK(secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, test_n_points[i]-1)); + secp256k1_gej_set_infinity(&r2); + secp256k1_gej_add_var(&r2, &r2, &r, NULL); + CHECK(secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, test_n_points[i])); + secp256k1_gej_set_ge(&ptgj, &pt[test_n_points[i]-1]); + secp256k1_ecmult(&ctx->ecmult_ctx, &ptgj, &ptgj, &sc[test_n_points[i]-1], NULL); + secp256k1_gej_add_var(&r2, &r2, &ptgj, NULL); + + secp256k1_gej_neg(&r, &r); + secp256k1_gej_add_var(&r, &r, &r2, NULL); + CHECK(secp256k1_gej_is_infinity(&r)); + } secp256k1_scratch_destroy(scratch); + free(sc); + free(pt); +} + +void run_ecmult_multi_tests(void) { + secp256k1_scratch *scratch; + + scratch = secp256k1_scratch_create(&ctx->error_callback, 0, 819200); + test_ecmult_multi(scratch, &secp256k1_ecmult_multi_var); + test_ecmult_multi(scratch, &secp256k1_ecmult_pippenger_batch_single); + test_ecmult_multi(scratch, &secp256k1_ecmult_strauss_batch_single); + secp256k1_scratch_destroy(scratch); + + test_ecmult_multi_batching(); } void test_wnaf(const secp256k1_scalar *number, int w) { @@ -2847,6 +2969,61 @@ void test_constant_wnaf(const secp256k1_scalar *number, int w) { CHECK(secp256k1_scalar_eq(&x, &num)); } +void test_fixed_wnaf(const secp256k1_scalar *number, int w) { + secp256k1_scalar x, shift; + int wnaf[256] = {0}; + int i; + int skew; + secp256k1_scalar num = *number; + + secp256k1_scalar_set_int(&x, 0); + secp256k1_scalar_set_int(&shift, 1 << w); + /* With USE_ENDOMORPHISM on we only consider 128-bit numbers */ +#ifdef USE_ENDOMORPHISM + for (i = 0; i < 16; ++i) { + secp256k1_scalar_shr_int(&num, 8); + } +#endif + skew = secp256k1_wnaf_fixed(wnaf, &num, w); + + for (i = WNAF_SIZE(w)-1; i >= 0; --i) { + secp256k1_scalar t; + int v = wnaf[i]; + CHECK(v != 0); /* check nonzero */ + CHECK(v & 1); /* check parity */ + CHECK(v > -(1 << w)); /* check range above */ + CHECK(v < (1 << w)); /* check range below */ + + secp256k1_scalar_mul(&x, &x, &shift); + if (v >= 0) { + secp256k1_scalar_set_int(&t, v); + } else { + secp256k1_scalar_set_int(&t, -v); + secp256k1_scalar_negate(&t, &t); + } + secp256k1_scalar_add(&x, &x, &t); + } + /* If skew is 1 then add 1 to num */ + secp256k1_scalar_cadd_bit(&num, 0, skew == 1); + CHECK(secp256k1_scalar_eq(&x, &num)); +} + +void test_fixed_wnaf_zero(int w) { + int wnaf[256] = {0}; + int i; + int skew; + secp256k1_scalar num; + + secp256k1_scalar_set_int(&num, 0); + skew = secp256k1_wnaf_fixed(wnaf, &num, w); + + for (i = WNAF_SIZE(w)-1; i >= 0; --i) { + int v = wnaf[i]; + CHECK(v == 0); + } + CHECK(skew == 0); +} + void run_wnaf(void) { int i; secp256k1_scalar n = {{0}}; @@ -2857,12 +3034,15 @@ void run_wnaf(void) { test_constant_wnaf(&n, 4); n.d[0] = 2; test_constant_wnaf(&n, 4); + /* Test 0 */ + test_fixed_wnaf_zero(4); /* Random tests */ for (i = 0; i < count; i++) { random_scalar_order(&n); test_wnaf(&n, 4+(i%10)); test_constant_wnaf_negate(&n); test_constant_wnaf(&n, 4 + (i % 10)); + test_fixed_wnaf(&n, 4 + (i % 10)); } secp256k1_scalar_set_int(&n, 0); CHECK(secp256k1_scalar_cond_negate(&n, 1) == -1); diff --git a/src/tests_exhaustive.c b/src/tests_exhaustive.c index c5e86ef8c835a..141645eaeb827 100644 --- a/src/tests_exhaustive.c +++ b/src/tests_exhaustive.c @@ -212,7 +212,7 @@ void test_exhaustive_ecmult_multi(const secp256k1_context *ctx, const secp256k1_ data.pt[0] = group[x]; data.pt[1] = group[y]; - secp256k1_ecmult_multi(&ctx->ecmult_ctx, scratch, &tmp, &g_sc, ecmult_multi_callback, &data, 2); + secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &tmp, &g_sc, ecmult_multi_callback, &data, 2); ge_equals_gej(&group[(i * x + j * y + k) % order], &tmp); } } From 36b22c9337dbaa3db93d87832b6ff52273135031 Mon Sep 17 00:00:00 2001 From: Jonas Nick Date: Sun, 5 Nov 2017 19:40:18 +0000 Subject: [PATCH 6/9] Use scratch space dependent batching in ecmult_multi --- src/ecmult.h | 4 ++ src/ecmult_impl.h | 137 ++++++++++++++++++++++++++++++++++++++++------ src/tests.c | 104 ++++++++++++++++++++++------------- 3 files changed, 191 insertions(+), 54 deletions(-) diff --git a/src/ecmult.h b/src/ecmult.h index 3b52988e1fe18..ea1cd8a21f66b 100644 --- a/src/ecmult.h +++ b/src/ecmult.h @@ -34,6 +34,10 @@ typedef int (secp256k1_ecmult_multi_callback)(secp256k1_scalar *sc, secp256k1_ge /** * Multi-multiply: R = inp_g_sc * G + sum_i ni * Ai. + * Chooses the right algorithm for a given number of points and scratch space + * size. Resets and overwrites the given scratch space. If the points do not + * fit in the scratch space the algorithm is repeatedly run with batches of + * points. * Returns: 1 on success (including when inp_g_sc is NULL and n is 0) * 0 if there is not enough scratch space for a single point or * callback returns 0 diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h index ff4fffcbf9bcd..3749cf938a646 100644 --- a/src/ecmult_impl.h +++ b/src/ecmult_impl.h @@ -8,6 +8,7 @@ #define SECP256K1_ECMULT_IMPL_H #include +#include #include "group.h" #include "scalar.h" @@ -55,6 +56,8 @@ #define PIPPENGER_SCRATCH_OBJECTS 6 #define STRAUSS_SCRATCH_OBJECTS 6 +#define PIPPENGER_MAX_BUCKET_WINDOW 12 + /* Minimum number of points for which pippenger_wnaf is faster than strauss wnaf */ #ifdef USE_ENDOMORPHISM #define ECMULT_PIPPENGER_THRESHOLD 96 @@ -62,6 +65,12 @@ #define ECMULT_PIPPENGER_THRESHOLD 156 #endif +#ifdef USE_ENDOMORPHISM + #define ECMULT_MAX_POINTS_PER_BATCH 5000000 +#else + #define ECMULT_MAX_POINTS_PER_BATCH 10000000 +#endif + /** Fill a table 'prej' with precomputed odd multiples of a. Prej will contain * the values [1*a,3*a,...,(2*n-1)*a], so it space for n values. zr[0] will * contain prej[0].z / a.z. The other zr[i] values = prej[i].z / prej[i-1].z. @@ -545,6 +554,10 @@ static int secp256k1_ecmult_strauss_batch_single(const secp256k1_ecmult_context return secp256k1_ecmult_strauss_batch(actx, scratch, r, inp_g_sc, cb, cbdata, n, 0); } +static size_t secp256k1_strauss_max_points(secp256k1_scratch *scratch) { + return secp256k1_scratch_max_allocation(scratch, STRAUSS_SCRATCH_OBJECTS) / secp256k1_strauss_scratch_size(1); +} + /** Convert a number to WNAF notation. * The number becomes represented by sum(2^{wi} * wnaf[i], i=0..WNAF_SIZE(w)+1) - return_val. * It has the following guarantees: @@ -724,7 +737,7 @@ static int secp256k1_pippenger_bucket_window(size_t n) { } else if (n <= 28600) { return 11; } else { - return 12; + return PIPPENGER_MAX_BUCKET_WINDOW; } #else if (n <= 2) { @@ -750,11 +763,48 @@ static int secp256k1_pippenger_bucket_window(size_t n) { } else if (n <= 35000) { return 11; } else { - return 12; + return PIPPENGER_MAX_BUCKET_WINDOW; } #endif } +/** + * Returns the maximum optimal number of points for a bucket_window. + */ +static size_t secp256k1_pippenger_bucket_window_inv(int bucket_window) { + switch(bucket_window) { +#ifdef USE_ENDOMORPHISM + case 1: return 4; + case 2: return 8; + case 3: return 40; + case 4: return 117; + case 5: return 280; + case 6: return 480; + case 7: return 2560; + case 8: return 2560; + case 9: return 9200; + case 10: return 17400; + case 11: return 28600; + case PIPPENGER_MAX_BUCKET_WINDOW: return SIZE_MAX; +#else + case 1: return 2; + case 2: return 9; + case 3: return 42; + case 4: return 100; + case 5: return 280; + case 6: return 610; + case 7: return 1920; + case 8: return 3400; + case 9: return 10240; + case 10: return 19000; + case 11: return 35000; + case PIPPENGER_MAX_BUCKET_WINDOW: return SIZE_MAX; +#endif + } + return 0; +} + + #ifdef USE_ENDOMORPHISM SECP256K1_INLINE static void secp256k1_ecmult_endo_split(secp256k1_scalar *s1, secp256k1_scalar *s2, secp256k1_ge *p1, secp256k1_ge *p2) { secp256k1_scalar tmp = *s1; @@ -865,11 +915,53 @@ static int secp256k1_ecmult_pippenger_batch_single(const secp256k1_ecmult_contex return secp256k1_ecmult_pippenger_batch(actx, scratch, r, inp_g_sc, cb, cbdata, n, 0); } -#define MAX_BATCH_SIZE 1024 +/** + * Returns the maximum number of points in addition to G that can be used with + * a given scratch space. The function ensures that fewer points may also be + * used. + */ +static size_t secp256k1_pippenger_max_points(secp256k1_scratch *scratch) { + size_t max_alloc = secp256k1_scratch_max_allocation(scratch, PIPPENGER_SCRATCH_OBJECTS); + int bucket_window; + size_t res = 0; + + for (bucket_window = 1; bucket_window <= PIPPENGER_MAX_BUCKET_WINDOW; bucket_window++) { + size_t n_points; + size_t max_points = secp256k1_pippenger_bucket_window_inv(bucket_window); + size_t space_for_points; + size_t space_overhead; + size_t entry_size = sizeof(secp256k1_ge) + sizeof(secp256k1_scalar) + sizeof(struct secp256k1_pippenger_point_state) + (WNAF_SIZE(bucket_window+1)+1)*sizeof(int); + +#ifdef USE_ENDOMORPHISM + entry_size = 2*entry_size; +#endif + space_overhead = ((1< max_alloc) { + break; + } + space_for_points = max_alloc - space_overhead; + + n_points = space_for_points/entry_size; + n_points = n_points > max_points ? max_points : n_points; + if (n_points > res) { + res = n_points; + } + if (n_points < max_points) { + /* A larger bucket_window may support even more points. But if we + * would choose that then the caller couldn't safely use any number + * smaller than what this function returns */ + break; + } + } + return res; +} + typedef int (*secp256k1_ecmult_multi_func)(const secp256k1_ecmult_context*, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t); static int secp256k1_ecmult_multi_var(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) { size_t i; + int (*f)(const secp256k1_ecmult_context*, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t, size_t); + size_t max_points; size_t n_batches; size_t n_batch_points; @@ -883,25 +975,36 @@ static int secp256k1_ecmult_multi_var(const secp256k1_ecmult_context *ctx, secp2 return 1; } - if(n <= ECMULT_PIPPENGER_THRESHOLD) { - if(!secp256k1_ecmult_strauss_batch(ctx, scratch, r, inp_g_sc, cb, cbdata, n, 0)) { + max_points = secp256k1_pippenger_max_points(scratch); + if (max_points == 0) { + return 0; + } else if (max_points > ECMULT_MAX_POINTS_PER_BATCH) { + max_points = ECMULT_MAX_POINTS_PER_BATCH; + } + n_batches = (n+max_points-1)/max_points; + n_batch_points = (n+n_batches-1)/n_batches; + + if (n_batch_points >= ECMULT_PIPPENGER_THRESHOLD) { + f = secp256k1_ecmult_pippenger_batch; + } else { + max_points = secp256k1_strauss_max_points(scratch); + if (max_points == 0) { return 0; } - } else { - n_batches = (n+MAX_BATCH_SIZE-1)/MAX_BATCH_SIZE; + n_batches = (n+max_points-1)/max_points; n_batch_points = (n+n_batches-1)/n_batches; - for(i = 0; i < n_batches; i++) { - size_t nbp = n < n_batch_points ? n : n_batch_points; - size_t offset = n_batch_points*i; - secp256k1_gej tmp; - if(!secp256k1_ecmult_pippenger_batch(ctx, scratch, &tmp, i == 0 ? inp_g_sc : NULL, cb, cbdata, nbp, offset)) { - return 0; - } - secp256k1_gej_add_var(r, r, &tmp, NULL); - n -= nbp; + f = secp256k1_ecmult_strauss_batch; + } + for(i = 0; i < n_batches; i++) { + size_t nbp = n < n_batch_points ? n : n_batch_points; + size_t offset = n_batch_points*i; + secp256k1_gej tmp; + if (!f(ctx, scratch, &tmp, i == 0 ? inp_g_sc : NULL, cb, cbdata, nbp, offset)) { + return 0; } + secp256k1_gej_add_var(r, r, &tmp, NULL); + n -= nbp; } - return 1; } diff --git a/src/tests.c b/src/tests.c index 55929c5e2ff57..fcc6019e40b93 100644 --- a/src/tests.c +++ b/src/tests.c @@ -2783,8 +2783,56 @@ void test_ecmult_multi(secp256k1_scratch *scratch, secp256k1_ecmult_multi_func e } } +void test_secp256k1_pippenger_bucket_window_inv(void) { + int i; + + CHECK(secp256k1_pippenger_bucket_window_inv(0) == 0); + for(i = 1; i <= PIPPENGER_MAX_BUCKET_WINDOW; i++) { +#ifdef USE_ENDOMORPHISM + /* Bucket_window of 8 is not used with endo */ + if (i == 8) { + continue; + } +#endif + CHECK(secp256k1_pippenger_bucket_window(secp256k1_pippenger_bucket_window_inv(i)) == i); + if (i != PIPPENGER_MAX_BUCKET_WINDOW) { + CHECK(secp256k1_pippenger_bucket_window(secp256k1_pippenger_bucket_window_inv(i)+1) > i); + } + } +} + +/** + * Probabilistically test the function returning the maximum number of possible points + * for a given scratch space. + */ +void test_ecmult_multi_pippenger_max_points(void) { + size_t scratch_size = secp256k1_rand_int(256); + size_t max_size = secp256k1_pippenger_scratch_size(secp256k1_pippenger_bucket_window_inv(PIPPENGER_MAX_BUCKET_WINDOW-1)+512, 12); + secp256k1_scratch *scratch; + size_t n_points_supported; + int bucket_window = 0; + + for(; scratch_size < max_size; scratch_size+=256) { + scratch = secp256k1_scratch_create(&ctx->error_callback, 0, scratch_size); + CHECK(scratch != NULL); + n_points_supported = secp256k1_pippenger_max_points(scratch); + if (n_points_supported == 0) { + secp256k1_scratch_destroy(scratch); + continue; + } + bucket_window = secp256k1_pippenger_bucket_window(n_points_supported); + CHECK(secp256k1_scratch_resize(scratch, secp256k1_pippenger_scratch_size(n_points_supported, bucket_window), PIPPENGER_SCRATCH_OBJECTS)); + secp256k1_scratch_destroy(scratch); + } + CHECK(bucket_window == PIPPENGER_MAX_BUCKET_WINDOW); +} + +/** + * Run secp256k1_ecmult_multi_var with num points and a scratch space restricted to + * 1 <= i <= num points. + */ void test_ecmult_multi_batching(void) { - static const int n_points = 3*MAX_BATCH_SIZE; + static const int n_points = 2*ECMULT_PIPPENGER_THRESHOLD; secp256k1_scalar scG; secp256k1_scalar szero; secp256k1_scalar *sc = (secp256k1_scalar *)checked_malloc(&ctx->error_callback, sizeof(secp256k1_scalar) * n_points); @@ -2795,18 +2843,21 @@ void test_ecmult_multi_batching(void) { int i; secp256k1_scratch *scratch; - int test_n_points[] = { MAX_BATCH_SIZE, MAX_BATCH_SIZE + 1, MAX_BATCH_SIZE + 2, 2*MAX_BATCH_SIZE, 2*MAX_BATCH_SIZE+1, 3*MAX_BATCH_SIZE }; secp256k1_gej_set_infinity(&r2); secp256k1_scalar_set_int(&szero, 0); - /* Get random scalars and group elements */ + /* Get random scalars and group elements and compute result */ random_scalar_order(&scG); secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &r2, &szero, &scG); for(i = 0; i < n_points; i++) { secp256k1_ge ptg; + secp256k1_gej ptgj; random_group_element_test(&ptg); + secp256k1_gej_set_ge(&ptgj, &ptg); pt[i] = ptg; random_scalar_order(&sc[i]); + secp256k1_ecmult(&ctx->ecmult_ctx, &ptgj, &ptgj, &sc[i], NULL); + secp256k1_gej_add_var(&r2, &r2, &ptgj, NULL); } data.sc = sc; data.pt = pt; @@ -2822,10 +2873,8 @@ void test_ecmult_multi_batching(void) { CHECK(!secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, 1)); secp256k1_scratch_destroy(scratch); - /* Run secp256k1_ecmult_multi_var with i points and a scratch space - * restricted to i points. */ - for(i = 1; i <= ECMULT_PIPPENGER_THRESHOLD+2; i++) { - secp256k1_gej ptgj; + secp256k1_gej_neg(&r2, &r2); + for(i = 1; i <= n_points; i++) { if (i > ECMULT_PIPPENGER_THRESHOLD) { int bucket_window = secp256k1_pippenger_bucket_window(i); size_t scratch_size = secp256k1_pippenger_scratch_size(i, bucket_window); @@ -2834,37 +2883,11 @@ void test_ecmult_multi_batching(void) { size_t scratch_size = secp256k1_strauss_scratch_size(i); scratch = secp256k1_scratch_create(&ctx->error_callback, 0, scratch_size + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT); } - CHECK(secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, i)); - - /* compute running result */ - secp256k1_gej_set_ge(&ptgj, &pt[i-1]); - secp256k1_ecmult(&ctx->ecmult_ctx, &ptgj, &ptgj, &sc[i-1], NULL); - secp256k1_gej_add_var(&r2, &r2, &ptgj, NULL); - - secp256k1_gej_neg(&r, &r); + CHECK(secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, n_points)); secp256k1_gej_add_var(&r, &r, &r2, NULL); CHECK(secp256k1_gej_is_infinity(&r)); secp256k1_scratch_destroy(scratch); } - - scratch = secp256k1_scratch_create(&ctx->error_callback, 0, secp256k1_strauss_scratch_size(n_points) + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT); - - for(i = 0; i < (int)(sizeof(test_n_points) / sizeof(test_n_points[0])); i++) { - secp256k1_gej ptgj; - CHECK(secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, test_n_points[i]-1)); - secp256k1_gej_set_infinity(&r2); - secp256k1_gej_add_var(&r2, &r2, &r, NULL); - CHECK(secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, test_n_points[i])); - secp256k1_gej_set_ge(&ptgj, &pt[test_n_points[i]-1]); - secp256k1_ecmult(&ctx->ecmult_ctx, &ptgj, &ptgj, &sc[test_n_points[i]-1], NULL); - secp256k1_gej_add_var(&r2, &r2, &ptgj, NULL); - - secp256k1_gej_neg(&r, &r); - secp256k1_gej_add_var(&r, &r, &r2, NULL); - CHECK(secp256k1_gej_is_infinity(&r)); - } - - secp256k1_scratch_destroy(scratch); free(sc); free(pt); } @@ -2872,10 +2895,17 @@ void test_ecmult_multi_batching(void) { void run_ecmult_multi_tests(void) { secp256k1_scratch *scratch; + test_secp256k1_pippenger_bucket_window_inv(); + test_ecmult_multi_pippenger_max_points(); scratch = secp256k1_scratch_create(&ctx->error_callback, 0, 819200); - test_ecmult_multi(scratch, &secp256k1_ecmult_multi_var); - test_ecmult_multi(scratch, &secp256k1_ecmult_pippenger_batch_single); - test_ecmult_multi(scratch, &secp256k1_ecmult_strauss_batch_single); + test_ecmult_multi(scratch, secp256k1_ecmult_multi_var); + test_ecmult_multi(scratch, secp256k1_ecmult_pippenger_batch_single); + test_ecmult_multi(scratch, secp256k1_ecmult_strauss_batch_single); + secp256k1_scratch_destroy(scratch); + + /* Run test_ecmult_multi with space for exactly one point */ + scratch = secp256k1_scratch_create(&ctx->error_callback, 0, secp256k1_strauss_scratch_size(1) + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT); + test_ecmult_multi(scratch, secp256k1_ecmult_multi_var); secp256k1_scratch_destroy(scratch); test_ecmult_multi_batching(); From a58f543f5ac10f8fca4a6e6c0022e170f92b96c9 Mon Sep 17 00:00:00 2001 From: Jonas Nick Date: Sun, 5 Nov 2017 20:17:11 +0000 Subject: [PATCH 7/9] Add flags for choosing algorithm in ecmult_multi benchmark --- src/bench.h | 16 ++++++++++++++++ src/bench_ecmult.c | 19 +++++++++++++++++-- src/bench_internal.c | 15 --------------- 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/src/bench.h b/src/bench.h index d5ebe01301b81..5b59783f68a95 100644 --- a/src/bench.h +++ b/src/bench.h @@ -8,6 +8,7 @@ #define SECP256K1_BENCH_H #include +#include #include #include "sys/time.h" @@ -63,4 +64,19 @@ void run_benchmark(char *name, void (*benchmark)(void*), void (*setup)(void*), v printf("us\n"); } +int have_flag(int argc, char** argv, char *flag) { + char** argm = argv + argc; + argv++; + if (argv == argm) { + return 1; + } + while (argv != NULL && argv != argm) { + if (strcmp(*argv, flag) == 0) { + return 1; + } + argv++; + } + return 0; +} + #endif /* SECP256K1_BENCH_H */ diff --git a/src/bench_ecmult.c b/src/bench_ecmult.c index c1b032c5253c8..3a7bfe379c67d 100644 --- a/src/bench_ecmult.c +++ b/src/bench_ecmult.c @@ -28,6 +28,7 @@ typedef struct { secp256k1_ge* pubkeys; secp256k1_scalar* seckeys; secp256k1_gej* expected_output; + secp256k1_ecmult_multi_func ecmult_multi; /* Changes per test */ size_t count; @@ -63,7 +64,7 @@ static void bench_ecmult(void* arg) { size_t iter; for (iter = 0; iter < iters; ++iter) { - secp256k1_ecmult_multi_var(&data->ctx->ecmult_ctx, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_callback, arg, count - includes_g); + data->ecmult_multi(&data->ctx->ecmult_ctx, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_callback, arg, count - includes_g); data->offset1 = (data->offset1 + count) % POINTS; data->offset2 = (data->offset2 + count - 1) % POINTS; } @@ -136,10 +137,24 @@ int main(int argc, char **argv) { bench_data data; int i, p; secp256k1_gej* pubkeys_gej; + size_t scratch_size; + + if (argc > 1) { + if(have_flag(argc, argv, "pippenger_wnaf")) { + printf("Using pippenger_wnaf:\n"); + data.ecmult_multi = secp256k1_ecmult_pippenger_batch_single; + } else if(have_flag(argc, argv, "strauss_wnaf")) { + printf("Using strauss_wnaf:\n"); + data.ecmult_multi = secp256k1_ecmult_strauss_batch_single; + } + } else { + data.ecmult_multi = secp256k1_ecmult_multi_var; + } /* Allocate stuff */ data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY); - data.scratch = secp256k1_scratch_space_create(data.ctx, POINTS * 1024, POINTS * 5 * 1024); + scratch_size = secp256k1_strauss_scratch_size(POINTS) + STRAUSS_SCRATCH_OBJECTS*16; + data.scratch = secp256k1_scratch_space_create(data.ctx, scratch_size, scratch_size); data.scalars = malloc(sizeof(secp256k1_scalar) * POINTS); data.seckeys = malloc(sizeof(secp256k1_scalar) * POINTS); data.pubkeys = malloc(sizeof(secp256k1_ge) * POINTS); diff --git a/src/bench_internal.c b/src/bench_internal.c index 9b30c50d0be59..996aa8b0f1346 100644 --- a/src/bench_internal.c +++ b/src/bench_internal.c @@ -324,21 +324,6 @@ void bench_num_jacobi(void* arg) { } #endif -int have_flag(int argc, char** argv, char *flag) { - char** argm = argv + argc; - argv++; - if (argv == argm) { - return 1; - } - while (argv != NULL && argv != argm) { - if (strcmp(*argv, flag) == 0) { - return 1; - } - argv++; - } - return 0; -} - int main(int argc, char **argv) { bench_inv data; if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "add")) run_benchmark("scalar_add", bench_scalar_add, bench_setup, NULL, &data, 10, 2000000); From 4c950bbeafb4cbbeb5ffc742a29e916224fbcb4e Mon Sep 17 00:00:00 2001 From: Peter Dettman Date: Fri, 1 Dec 2017 02:52:19 +0700 Subject: [PATCH 8/9] Save some additions per window in _pippenger_wnaf --- src/ecmult_impl.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h index 3749cf938a646..67fd4c6921a68 100644 --- a/src/ecmult_impl.h +++ b/src/ecmult_impl.h @@ -654,14 +654,10 @@ static int secp256k1_ecmult_pippenger_wnaf(secp256k1_gej *buckets, int bucket_wi for (i = n_wnaf - 1; i >= 0; i--) { secp256k1_gej running_sum; - secp256k1_gej walking_sum; for(j = 0; j < ECMULT_TABLE_SIZE(bucket_window+2); j++) { secp256k1_gej_set_infinity(&buckets[j]); } - for(j = 0; j < bucket_window+1; j++) { - secp256k1_gej_double_var(r, r, NULL); - } for (np = 0; np < no; ++np) { int n = state->wnaf_na[np*n_wnaf + i]; @@ -688,24 +684,28 @@ static int secp256k1_ecmult_pippenger_wnaf(secp256k1_gej *buckets, int bucket_wi secp256k1_gej_add_ge_var(&buckets[idx], &buckets[idx], &tmp, NULL); } } + + for(j = 0; j < bucket_window; j++) { + secp256k1_gej_double_var(r, r, NULL); + } + secp256k1_gej_set_infinity(&running_sum); - secp256k1_gej_set_infinity(&walking_sum); - /* Compute walking_sum as bucket[0] + 3*bucket[1] + 5*bucket[2] + ... - * by first setting + /* Accumulate the sum: bucket[0] + 3*bucket[1] + 5*bucket[2] + 7*bucket[3] + ... + * = bucket[0] + bucket[1] + bucket[2] + bucket[3] + ... + * + 2 * (bucket[1] + 2*bucket[2] + 3*bucket[3] + ...) + * using an intermediate running sum: * running_sum = bucket[0] + bucket[1] + bucket[2] + ... - * walking_sum = bucket[0] + 2*bucket[1] + 3*bucket[2] + ... - * and then computing - * walking_sum = 2*walking_sum - running_sum + * + * The doubling is done implicitly by deferring the final window doubling (of 'r'). */ - for(j = ECMULT_TABLE_SIZE(bucket_window+2) - 1; j >= 0; j--) { + for(j = ECMULT_TABLE_SIZE(bucket_window+2) - 1; j > 0; j--) { secp256k1_gej_add_var(&running_sum, &running_sum, &buckets[j], NULL); - secp256k1_gej_add_var(&walking_sum, &walking_sum, &running_sum, NULL); + secp256k1_gej_add_var(r, r, &running_sum, NULL); } - secp256k1_gej_double_var(&walking_sum, &walking_sum, NULL); - secp256k1_gej_neg(&running_sum, &running_sum); - secp256k1_gej_add_var(&walking_sum, &walking_sum, &running_sum, NULL); - secp256k1_gej_add_var(r, r, &walking_sum, NULL); + secp256k1_gej_add_var(&running_sum, &running_sum, &buckets[0], NULL); + secp256k1_gej_double_var(r, r, NULL); + secp256k1_gej_add_var(r, r, &running_sum, NULL); } return 1; } From d2f9c6b5dc8bd8fed20f046b18b368b1715d720a Mon Sep 17 00:00:00 2001 From: Jonas Nick Date: Wed, 6 Dec 2017 10:24:00 +0000 Subject: [PATCH 9/9] Use more precise pippenger bucket windows --- src/ecmult_impl.h | 82 +++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h index 67fd4c6921a68..fd14bf1285784 100644 --- a/src/ecmult_impl.h +++ b/src/ecmult_impl.h @@ -60,9 +60,9 @@ /* Minimum number of points for which pippenger_wnaf is faster than strauss wnaf */ #ifdef USE_ENDOMORPHISM - #define ECMULT_PIPPENGER_THRESHOLD 96 + #define ECMULT_PIPPENGER_THRESHOLD 88 #else - #define ECMULT_PIPPENGER_THRESHOLD 156 + #define ECMULT_PIPPENGER_THRESHOLD 160 #endif #ifdef USE_ENDOMORPHISM @@ -716,51 +716,51 @@ static int secp256k1_ecmult_pippenger_wnaf(secp256k1_gej *buckets, int bucket_wi */ static int secp256k1_pippenger_bucket_window(size_t n) { #ifdef USE_ENDOMORPHISM - if (n <= 4) { + if (n <= 1) { return 1; - } else if (n <= 8) { + } else if (n <= 4) { return 2; - } else if (n <= 40) { + } else if (n <= 20) { return 3; - } else if (n <= 117) { + } else if (n <= 57) { return 4; - } else if (n <= 280) { + } else if (n <= 136) { return 5; - } else if (n <= 480) { + } else if (n <= 235) { return 6; - } else if (n <= 2560) { + } else if (n <= 1260) { return 7; - } else if (n <= 9200) { + } else if (n <= 4420) { return 9; - } else if (n <= 17400) { + } else if (n <= 7880) { return 10; - } else if (n <= 28600) { + } else if (n <= 16050) { return 11; } else { return PIPPENGER_MAX_BUCKET_WINDOW; } #else - if (n <= 2) { + if (n <= 1) { return 1; - } else if (n <= 9) { + } else if (n <= 11) { return 2; - } else if (n <= 42) { + } else if (n <= 45) { return 3; } else if (n <= 100) { return 4; - } else if (n <= 280) { + } else if (n <= 275) { return 5; - } else if (n <= 610) { + } else if (n <= 625) { return 6; - } else if (n <= 1920) { + } else if (n <= 1850) { return 7; } else if (n <= 3400) { return 8; - } else if (n <= 10240) { + } else if (n <= 9630) { return 9; - } else if (n <= 19000) { + } else if (n <= 17900) { return 10; - } else if (n <= 35000) { + } else if (n <= 32800) { return 11; } else { return PIPPENGER_MAX_BUCKET_WINDOW; @@ -774,30 +774,30 @@ static int secp256k1_pippenger_bucket_window(size_t n) { static size_t secp256k1_pippenger_bucket_window_inv(int bucket_window) { switch(bucket_window) { #ifdef USE_ENDOMORPHISM - case 1: return 4; - case 2: return 8; - case 3: return 40; - case 4: return 117; - case 5: return 280; - case 6: return 480; - case 7: return 2560; - case 8: return 2560; - case 9: return 9200; - case 10: return 17400; - case 11: return 28600; + case 1: return 1; + case 2: return 4; + case 3: return 20; + case 4: return 57; + case 5: return 136; + case 6: return 235; + case 7: return 1260; + case 8: return 1260; + case 9: return 4420; + case 10: return 7880; + case 11: return 16050; case PIPPENGER_MAX_BUCKET_WINDOW: return SIZE_MAX; #else - case 1: return 2; - case 2: return 9; - case 3: return 42; + case 1: return 1; + case 2: return 11; + case 3: return 45; case 4: return 100; - case 5: return 280; - case 6: return 610; - case 7: return 1920; + case 5: return 275; + case 6: return 625; + case 7: return 1850; case 8: return 3400; - case 9: return 10240; - case 10: return 19000; - case 11: return 35000; + case 9: return 9630; + case 10: return 17900; + case 11: return 32800; case PIPPENGER_MAX_BUCKET_WINDOW: return SIZE_MAX; #endif }