From 36b22c9337dbaa3db93d87832b6ff52273135031 Mon Sep 17 00:00:00 2001 From: Jonas Nick Date: Sun, 5 Nov 2017 19:40:18 +0000 Subject: [PATCH] Use scratch space dependent batching in ecmult_multi --- src/ecmult.h | 4 ++ src/ecmult_impl.h | 137 ++++++++++++++++++++++++++++++++++++++++------ src/tests.c | 104 ++++++++++++++++++++++------------- 3 files changed, 191 insertions(+), 54 deletions(-) diff --git a/src/ecmult.h b/src/ecmult.h index 3b52988e1fe18..ea1cd8a21f66b 100644 --- a/src/ecmult.h +++ b/src/ecmult.h @@ -34,6 +34,10 @@ typedef int (secp256k1_ecmult_multi_callback)(secp256k1_scalar *sc, secp256k1_ge /** * Multi-multiply: R = inp_g_sc * G + sum_i ni * Ai. + * Chooses the right algorithm for a given number of points and scratch space + * size. Resets and overwrites the given scratch space. If the points do not + * fit in the scratch space the algorithm is repeatedly run with batches of + * points. * Returns: 1 on success (including when inp_g_sc is NULL and n is 0) * 0 if there is not enough scratch space for a single point or * callback returns 0 diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h index ff4fffcbf9bcd..3749cf938a646 100644 --- a/src/ecmult_impl.h +++ b/src/ecmult_impl.h @@ -8,6 +8,7 @@ #define SECP256K1_ECMULT_IMPL_H #include +#include #include "group.h" #include "scalar.h" @@ -55,6 +56,8 @@ #define PIPPENGER_SCRATCH_OBJECTS 6 #define STRAUSS_SCRATCH_OBJECTS 6 +#define PIPPENGER_MAX_BUCKET_WINDOW 12 + /* Minimum number of points for which pippenger_wnaf is faster than strauss wnaf */ #ifdef USE_ENDOMORPHISM #define ECMULT_PIPPENGER_THRESHOLD 96 @@ -62,6 +65,12 @@ #define ECMULT_PIPPENGER_THRESHOLD 156 #endif +#ifdef USE_ENDOMORPHISM + #define ECMULT_MAX_POINTS_PER_BATCH 5000000 +#else + #define ECMULT_MAX_POINTS_PER_BATCH 10000000 +#endif + /** Fill a table 'prej' with precomputed odd multiples of a. Prej will contain * the values [1*a,3*a,...,(2*n-1)*a], so it space for n values. zr[0] will * contain prej[0].z / a.z. The other zr[i] values = prej[i].z / prej[i-1].z. @@ -545,6 +554,10 @@ static int secp256k1_ecmult_strauss_batch_single(const secp256k1_ecmult_context return secp256k1_ecmult_strauss_batch(actx, scratch, r, inp_g_sc, cb, cbdata, n, 0); } +static size_t secp256k1_strauss_max_points(secp256k1_scratch *scratch) { + return secp256k1_scratch_max_allocation(scratch, STRAUSS_SCRATCH_OBJECTS) / secp256k1_strauss_scratch_size(1); +} + /** Convert a number to WNAF notation. * The number becomes represented by sum(2^{wi} * wnaf[i], i=0..WNAF_SIZE(w)+1) - return_val. * It has the following guarantees: @@ -724,7 +737,7 @@ static int secp256k1_pippenger_bucket_window(size_t n) { } else if (n <= 28600) { return 11; } else { - return 12; + return PIPPENGER_MAX_BUCKET_WINDOW; } #else if (n <= 2) { @@ -750,11 +763,48 @@ static int secp256k1_pippenger_bucket_window(size_t n) { } else if (n <= 35000) { return 11; } else { - return 12; + return PIPPENGER_MAX_BUCKET_WINDOW; } #endif } +/** + * Returns the maximum optimal number of points for a bucket_window. + */ +static size_t secp256k1_pippenger_bucket_window_inv(int bucket_window) { + switch(bucket_window) { +#ifdef USE_ENDOMORPHISM + case 1: return 4; + case 2: return 8; + case 3: return 40; + case 4: return 117; + case 5: return 280; + case 6: return 480; + case 7: return 2560; + case 8: return 2560; + case 9: return 9200; + case 10: return 17400; + case 11: return 28600; + case PIPPENGER_MAX_BUCKET_WINDOW: return SIZE_MAX; +#else + case 1: return 2; + case 2: return 9; + case 3: return 42; + case 4: return 100; + case 5: return 280; + case 6: return 610; + case 7: return 1920; + case 8: return 3400; + case 9: return 10240; + case 10: return 19000; + case 11: return 35000; + case PIPPENGER_MAX_BUCKET_WINDOW: return SIZE_MAX; +#endif + } + return 0; +} + + #ifdef USE_ENDOMORPHISM SECP256K1_INLINE static void secp256k1_ecmult_endo_split(secp256k1_scalar *s1, secp256k1_scalar *s2, secp256k1_ge *p1, secp256k1_ge *p2) { secp256k1_scalar tmp = *s1; @@ -865,11 +915,53 @@ static int secp256k1_ecmult_pippenger_batch_single(const secp256k1_ecmult_contex return secp256k1_ecmult_pippenger_batch(actx, scratch, r, inp_g_sc, cb, cbdata, n, 0); } -#define MAX_BATCH_SIZE 1024 +/** + * Returns the maximum number of points in addition to G that can be used with + * a given scratch space. The function ensures that fewer points may also be + * used. + */ +static size_t secp256k1_pippenger_max_points(secp256k1_scratch *scratch) { + size_t max_alloc = secp256k1_scratch_max_allocation(scratch, PIPPENGER_SCRATCH_OBJECTS); + int bucket_window; + size_t res = 0; + + for (bucket_window = 1; bucket_window <= PIPPENGER_MAX_BUCKET_WINDOW; bucket_window++) { + size_t n_points; + size_t max_points = secp256k1_pippenger_bucket_window_inv(bucket_window); + size_t space_for_points; + size_t space_overhead; + size_t entry_size = sizeof(secp256k1_ge) + sizeof(secp256k1_scalar) + sizeof(struct secp256k1_pippenger_point_state) + (WNAF_SIZE(bucket_window+1)+1)*sizeof(int); + +#ifdef USE_ENDOMORPHISM + entry_size = 2*entry_size; +#endif + space_overhead = ((1< max_alloc) { + break; + } + space_for_points = max_alloc - space_overhead; + + n_points = space_for_points/entry_size; + n_points = n_points > max_points ? max_points : n_points; + if (n_points > res) { + res = n_points; + } + if (n_points < max_points) { + /* A larger bucket_window may support even more points. But if we + * would choose that then the caller couldn't safely use any number + * smaller than what this function returns */ + break; + } + } + return res; +} + typedef int (*secp256k1_ecmult_multi_func)(const secp256k1_ecmult_context*, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t); static int secp256k1_ecmult_multi_var(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) { size_t i; + int (*f)(const secp256k1_ecmult_context*, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t, size_t); + size_t max_points; size_t n_batches; size_t n_batch_points; @@ -883,25 +975,36 @@ static int secp256k1_ecmult_multi_var(const secp256k1_ecmult_context *ctx, secp2 return 1; } - if(n <= ECMULT_PIPPENGER_THRESHOLD) { - if(!secp256k1_ecmult_strauss_batch(ctx, scratch, r, inp_g_sc, cb, cbdata, n, 0)) { + max_points = secp256k1_pippenger_max_points(scratch); + if (max_points == 0) { + return 0; + } else if (max_points > ECMULT_MAX_POINTS_PER_BATCH) { + max_points = ECMULT_MAX_POINTS_PER_BATCH; + } + n_batches = (n+max_points-1)/max_points; + n_batch_points = (n+n_batches-1)/n_batches; + + if (n_batch_points >= ECMULT_PIPPENGER_THRESHOLD) { + f = secp256k1_ecmult_pippenger_batch; + } else { + max_points = secp256k1_strauss_max_points(scratch); + if (max_points == 0) { return 0; } - } else { - n_batches = (n+MAX_BATCH_SIZE-1)/MAX_BATCH_SIZE; + n_batches = (n+max_points-1)/max_points; n_batch_points = (n+n_batches-1)/n_batches; - for(i = 0; i < n_batches; i++) { - size_t nbp = n < n_batch_points ? n : n_batch_points; - size_t offset = n_batch_points*i; - secp256k1_gej tmp; - if(!secp256k1_ecmult_pippenger_batch(ctx, scratch, &tmp, i == 0 ? inp_g_sc : NULL, cb, cbdata, nbp, offset)) { - return 0; - } - secp256k1_gej_add_var(r, r, &tmp, NULL); - n -= nbp; + f = secp256k1_ecmult_strauss_batch; + } + for(i = 0; i < n_batches; i++) { + size_t nbp = n < n_batch_points ? n : n_batch_points; + size_t offset = n_batch_points*i; + secp256k1_gej tmp; + if (!f(ctx, scratch, &tmp, i == 0 ? inp_g_sc : NULL, cb, cbdata, nbp, offset)) { + return 0; } + secp256k1_gej_add_var(r, r, &tmp, NULL); + n -= nbp; } - return 1; } diff --git a/src/tests.c b/src/tests.c index 55929c5e2ff57..fcc6019e40b93 100644 --- a/src/tests.c +++ b/src/tests.c @@ -2783,8 +2783,56 @@ void test_ecmult_multi(secp256k1_scratch *scratch, secp256k1_ecmult_multi_func e } } +void test_secp256k1_pippenger_bucket_window_inv(void) { + int i; + + CHECK(secp256k1_pippenger_bucket_window_inv(0) == 0); + for(i = 1; i <= PIPPENGER_MAX_BUCKET_WINDOW; i++) { +#ifdef USE_ENDOMORPHISM + /* Bucket_window of 8 is not used with endo */ + if (i == 8) { + continue; + } +#endif + CHECK(secp256k1_pippenger_bucket_window(secp256k1_pippenger_bucket_window_inv(i)) == i); + if (i != PIPPENGER_MAX_BUCKET_WINDOW) { + CHECK(secp256k1_pippenger_bucket_window(secp256k1_pippenger_bucket_window_inv(i)+1) > i); + } + } +} + +/** + * Probabilistically test the function returning the maximum number of possible points + * for a given scratch space. + */ +void test_ecmult_multi_pippenger_max_points(void) { + size_t scratch_size = secp256k1_rand_int(256); + size_t max_size = secp256k1_pippenger_scratch_size(secp256k1_pippenger_bucket_window_inv(PIPPENGER_MAX_BUCKET_WINDOW-1)+512, 12); + secp256k1_scratch *scratch; + size_t n_points_supported; + int bucket_window = 0; + + for(; scratch_size < max_size; scratch_size+=256) { + scratch = secp256k1_scratch_create(&ctx->error_callback, 0, scratch_size); + CHECK(scratch != NULL); + n_points_supported = secp256k1_pippenger_max_points(scratch); + if (n_points_supported == 0) { + secp256k1_scratch_destroy(scratch); + continue; + } + bucket_window = secp256k1_pippenger_bucket_window(n_points_supported); + CHECK(secp256k1_scratch_resize(scratch, secp256k1_pippenger_scratch_size(n_points_supported, bucket_window), PIPPENGER_SCRATCH_OBJECTS)); + secp256k1_scratch_destroy(scratch); + } + CHECK(bucket_window == PIPPENGER_MAX_BUCKET_WINDOW); +} + +/** + * Run secp256k1_ecmult_multi_var with num points and a scratch space restricted to + * 1 <= i <= num points. + */ void test_ecmult_multi_batching(void) { - static const int n_points = 3*MAX_BATCH_SIZE; + static const int n_points = 2*ECMULT_PIPPENGER_THRESHOLD; secp256k1_scalar scG; secp256k1_scalar szero; secp256k1_scalar *sc = (secp256k1_scalar *)checked_malloc(&ctx->error_callback, sizeof(secp256k1_scalar) * n_points); @@ -2795,18 +2843,21 @@ void test_ecmult_multi_batching(void) { int i; secp256k1_scratch *scratch; - int test_n_points[] = { MAX_BATCH_SIZE, MAX_BATCH_SIZE + 1, MAX_BATCH_SIZE + 2, 2*MAX_BATCH_SIZE, 2*MAX_BATCH_SIZE+1, 3*MAX_BATCH_SIZE }; secp256k1_gej_set_infinity(&r2); secp256k1_scalar_set_int(&szero, 0); - /* Get random scalars and group elements */ + /* Get random scalars and group elements and compute result */ random_scalar_order(&scG); secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &r2, &szero, &scG); for(i = 0; i < n_points; i++) { secp256k1_ge ptg; + secp256k1_gej ptgj; random_group_element_test(&ptg); + secp256k1_gej_set_ge(&ptgj, &ptg); pt[i] = ptg; random_scalar_order(&sc[i]); + secp256k1_ecmult(&ctx->ecmult_ctx, &ptgj, &ptgj, &sc[i], NULL); + secp256k1_gej_add_var(&r2, &r2, &ptgj, NULL); } data.sc = sc; data.pt = pt; @@ -2822,10 +2873,8 @@ void test_ecmult_multi_batching(void) { CHECK(!secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, 1)); secp256k1_scratch_destroy(scratch); - /* Run secp256k1_ecmult_multi_var with i points and a scratch space - * restricted to i points. */ - for(i = 1; i <= ECMULT_PIPPENGER_THRESHOLD+2; i++) { - secp256k1_gej ptgj; + secp256k1_gej_neg(&r2, &r2); + for(i = 1; i <= n_points; i++) { if (i > ECMULT_PIPPENGER_THRESHOLD) { int bucket_window = secp256k1_pippenger_bucket_window(i); size_t scratch_size = secp256k1_pippenger_scratch_size(i, bucket_window); @@ -2834,37 +2883,11 @@ void test_ecmult_multi_batching(void) { size_t scratch_size = secp256k1_strauss_scratch_size(i); scratch = secp256k1_scratch_create(&ctx->error_callback, 0, scratch_size + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT); } - CHECK(secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, i)); - - /* compute running result */ - secp256k1_gej_set_ge(&ptgj, &pt[i-1]); - secp256k1_ecmult(&ctx->ecmult_ctx, &ptgj, &ptgj, &sc[i-1], NULL); - secp256k1_gej_add_var(&r2, &r2, &ptgj, NULL); - - secp256k1_gej_neg(&r, &r); + CHECK(secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, n_points)); secp256k1_gej_add_var(&r, &r, &r2, NULL); CHECK(secp256k1_gej_is_infinity(&r)); secp256k1_scratch_destroy(scratch); } - - scratch = secp256k1_scratch_create(&ctx->error_callback, 0, secp256k1_strauss_scratch_size(n_points) + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT); - - for(i = 0; i < (int)(sizeof(test_n_points) / sizeof(test_n_points[0])); i++) { - secp256k1_gej ptgj; - CHECK(secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, test_n_points[i]-1)); - secp256k1_gej_set_infinity(&r2); - secp256k1_gej_add_var(&r2, &r2, &r, NULL); - CHECK(secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, test_n_points[i])); - secp256k1_gej_set_ge(&ptgj, &pt[test_n_points[i]-1]); - secp256k1_ecmult(&ctx->ecmult_ctx, &ptgj, &ptgj, &sc[test_n_points[i]-1], NULL); - secp256k1_gej_add_var(&r2, &r2, &ptgj, NULL); - - secp256k1_gej_neg(&r, &r); - secp256k1_gej_add_var(&r, &r, &r2, NULL); - CHECK(secp256k1_gej_is_infinity(&r)); - } - - secp256k1_scratch_destroy(scratch); free(sc); free(pt); } @@ -2872,10 +2895,17 @@ void test_ecmult_multi_batching(void) { void run_ecmult_multi_tests(void) { secp256k1_scratch *scratch; + test_secp256k1_pippenger_bucket_window_inv(); + test_ecmult_multi_pippenger_max_points(); scratch = secp256k1_scratch_create(&ctx->error_callback, 0, 819200); - test_ecmult_multi(scratch, &secp256k1_ecmult_multi_var); - test_ecmult_multi(scratch, &secp256k1_ecmult_pippenger_batch_single); - test_ecmult_multi(scratch, &secp256k1_ecmult_strauss_batch_single); + test_ecmult_multi(scratch, secp256k1_ecmult_multi_var); + test_ecmult_multi(scratch, secp256k1_ecmult_pippenger_batch_single); + test_ecmult_multi(scratch, secp256k1_ecmult_strauss_batch_single); + secp256k1_scratch_destroy(scratch); + + /* Run test_ecmult_multi with space for exactly one point */ + scratch = secp256k1_scratch_create(&ctx->error_callback, 0, secp256k1_strauss_scratch_size(1) + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT); + test_ecmult_multi(scratch, secp256k1_ecmult_multi_var); secp256k1_scratch_destroy(scratch); test_ecmult_multi_batching();