diff --git a/src/bench_ecmult.c b/src/bench_ecmult.c index 68eff676edacc..1d463f92d0bee 100644 --- a/src/bench_ecmult.c +++ b/src/bench_ecmult.c @@ -18,29 +18,174 @@ #define POINTS 32768 +void help(char **argv) { + printf("Benchmark EC multiplication algorithms\n"); + printf("\n"); + printf("Usage: %s \n", argv[0]); + printf("The output shows the number of multiplied and summed points right after the\n"); + printf("function name. The letter 'g' indicates that one of the points is the generator.\n"); + printf("The benchmarks are divided by the number of points.\n"); + printf("\n"); + printf("default (ecmult_multi): picks pippenger_wnaf or strauss_wnaf depending on the\n"); + printf(" batch size\n"); + printf("pippenger_wnaf: for all batch sizes\n"); + printf("strauss_wnaf: for all batch sizes\n"); + printf("simple: multiply and sum each point individually\n"); +} + typedef struct { /* Setup once in advance */ secp256k1_context* ctx; secp256k1_scratch_space* scratch; secp256k1_scalar* scalars; secp256k1_ge* pubkeys; + secp256k1_gej* pubkeys_gej; secp256k1_scalar* seckeys; secp256k1_gej* expected_output; secp256k1_ecmult_multi_func ecmult_multi; - /* Changes per test */ + /* Changes per benchmark */ size_t count; int includes_g; - /* Changes per test iteration */ + /* Changes per benchmark iteration, used to pick different scalars and pubkeys + * in each run. */ size_t offset1; size_t offset2; - /* Test output. */ + /* Benchmark output. */ secp256k1_gej* output; } bench_data; -static int bench_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, void* arg) { +/* Hashes x into [0, POINTS) twice and store the result in offset1 and offset2. */ +static void hash_into_offset(bench_data* data, size_t x) { + data->offset1 = (x * 0x537b7f6f + 0x8f66a481) % POINTS; + data->offset2 = (x * 0x7f6f537b + 0x6a1a8f49) % POINTS; +} + +/* Check correctness of the benchmark by computing + * sum(outputs) ?= (sum(scalars_gen) + sum(seckeys)*sum(scalars))*G */ +static void bench_ecmult_teardown_helper(bench_data* data, size_t* seckey_offset, size_t* scalar_offset, size_t* scalar_gen_offset, int iters) { + int i; + secp256k1_gej sum_output, tmp; + secp256k1_scalar sum_scalars; + + secp256k1_gej_set_infinity(&sum_output); + secp256k1_scalar_clear(&sum_scalars); + for (i = 0; i < iters; ++i) { + secp256k1_gej_add_var(&sum_output, &sum_output, &data->output[i], NULL); + if (scalar_gen_offset != NULL) { + secp256k1_scalar_add(&sum_scalars, &sum_scalars, &data->scalars[(*scalar_gen_offset+i) % POINTS]); + } + if (seckey_offset != NULL) { + secp256k1_scalar s = data->seckeys[(*seckey_offset+i) % POINTS]; + secp256k1_scalar_mul(&s, &s, &data->scalars[(*scalar_offset+i) % POINTS]); + secp256k1_scalar_add(&sum_scalars, &sum_scalars, &s); + } + } + secp256k1_ecmult_gen(&data->ctx->ecmult_gen_ctx, &tmp, &sum_scalars); + secp256k1_gej_neg(&tmp, &tmp); + secp256k1_gej_add_var(&tmp, &tmp, &sum_output, NULL); + CHECK(secp256k1_gej_is_infinity(&tmp)); +} + +static void bench_ecmult_setup(void* arg) { + bench_data* data = (bench_data*)arg; + /* Re-randomize offset to ensure that we're using different scalars and + * group elements in each run. */ + hash_into_offset(data, data->offset1); +} + +static void bench_ecmult_gen(void* arg, int iters) { + bench_data* data = (bench_data*)arg; + int i; + + for (i = 0; i < iters; ++i) { + secp256k1_ecmult_gen(&data->ctx->ecmult_gen_ctx, &data->output[i], &data->scalars[(data->offset1+i) % POINTS]); + } +} + +static void bench_ecmult_gen_teardown(void* arg, int iters) { + bench_data* data = (bench_data*)arg; + bench_ecmult_teardown_helper(data, NULL, NULL, &data->offset1, iters); +} + +static void bench_ecmult_const(void* arg, int iters) { + bench_data* data = (bench_data*)arg; + int i; + + for (i = 0; i < iters; ++i) { + secp256k1_ecmult_const(&data->output[i], &data->pubkeys[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS], 256); + } +} + +static void bench_ecmult_const_teardown(void* arg, int iters) { + bench_data* data = (bench_data*)arg; + bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, NULL, iters); +} + +static void bench_ecmult_1(void* arg, int iters) { + bench_data* data = (bench_data*)arg; + int i; + + for (i = 0; i < iters; ++i) { + secp256k1_ecmult(&data->ctx->ecmult_ctx, &data->output[i], &data->pubkeys_gej[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS], NULL); + } +} + +static void bench_ecmult_1_teardown(void* arg, int iters) { + bench_data* data = (bench_data*)arg; + bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, NULL, iters); +} + +static void bench_ecmult_1g(void* arg, int iters) { + bench_data* data = (bench_data*)arg; + secp256k1_scalar zero; + int i; + + secp256k1_scalar_set_int(&zero, 0); + for (i = 0; i < iters; ++i) { + secp256k1_ecmult(&data->ctx->ecmult_ctx, &data->output[i], NULL, &zero, &data->scalars[(data->offset1+i) % POINTS]); + } +} + +static void bench_ecmult_1g_teardown(void* arg, int iters) { + bench_data* data = (bench_data*)arg; + bench_ecmult_teardown_helper(data, NULL, NULL, &data->offset1, iters); +} + +static void bench_ecmult_2g(void* arg, int iters) { + bench_data* data = (bench_data*)arg; + int i; + + for (i = 0; i < iters/2; ++i) { + secp256k1_ecmult(&data->ctx->ecmult_ctx, &data->output[i], &data->pubkeys_gej[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS], &data->scalars[(data->offset1+i) % POINTS]); + } +} + +static void bench_ecmult_2g_teardown(void* arg, int iters) { + bench_data* data = (bench_data*)arg; + bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, &data->offset1, iters/2); +} + +static void run_ecmult_bench(bench_data* data, int iters) { + char str[32]; + sprintf(str, "ecmult_gen"); + run_benchmark(str, bench_ecmult_gen, bench_ecmult_setup, bench_ecmult_gen_teardown, data, 10, iters); + sprintf(str, "ecmult_const"); + run_benchmark(str, bench_ecmult_const, bench_ecmult_setup, bench_ecmult_const_teardown, data, 10, iters); + /* ecmult with non generator point */ + sprintf(str, "ecmult 1"); + run_benchmark(str, bench_ecmult_1, bench_ecmult_setup, bench_ecmult_1_teardown, data, 10, iters); + /* ecmult with generator point */ + sprintf(str, "ecmult 1g"); + run_benchmark(str, bench_ecmult_1g, bench_ecmult_setup, bench_ecmult_1g_teardown, data, 10, iters); + /* ecmult with generator and non-generator point. The reported time is per point. */ + sprintf(str, "ecmult 2g"); + run_benchmark(str, bench_ecmult_2g, bench_ecmult_setup, bench_ecmult_2g_teardown, data, 10, 2*iters); +} + +static int bench_ecmult_multi_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, void* arg) { bench_data* data = (bench_data*)arg; if (data->includes_g) ++idx; if (idx == 0) { @@ -53,7 +198,7 @@ static int bench_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, vo return 1; } -static void bench_ecmult(void* arg, int iters) { +static void bench_ecmult_multi(void* arg, int iters) { bench_data* data = (bench_data*)arg; int includes_g = data->includes_g; @@ -62,19 +207,18 @@ static void bench_ecmult(void* arg, int iters) { iters = iters / data->count; for (iter = 0; iter < iters; ++iter) { - data->ecmult_multi(&data->ctx->error_callback, &data->ctx->ecmult_ctx, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_callback, arg, count - includes_g); + data->ecmult_multi(&data->ctx->error_callback, &data->ctx->ecmult_ctx, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_ecmult_multi_callback, arg, count - includes_g); data->offset1 = (data->offset1 + count) % POINTS; data->offset2 = (data->offset2 + count - 1) % POINTS; } } -static void bench_ecmult_setup(void* arg) { +static void bench_ecmult_multi_setup(void* arg) { bench_data* data = (bench_data*)arg; - data->offset1 = (data->count * 0x537b7f6f + 0x8f66a481) % POINTS; - data->offset2 = (data->count * 0x7f6f537b + 0x6a1a8f49) % POINTS; + hash_into_offset(data, data->count); } -static void bench_ecmult_teardown(void* arg, int iters) { +static void bench_ecmult_multi_teardown(void* arg, int iters) { bench_data* data = (bench_data*)arg; int iter; iters = iters / data->count; @@ -88,7 +232,7 @@ static void bench_ecmult_teardown(void* arg, int iters) { static void generate_scalar(uint32_t num, secp256k1_scalar* scalar) { secp256k1_sha256 sha256; - unsigned char c[11] = {'e', 'c', 'm', 'u', 'l', 't', 0, 0, 0, 0}; + unsigned char c[10] = {'e', 'c', 'm', 'u', 'l', 't', 0, 0, 0, 0}; unsigned char buf[32]; int overflow = 0; c[6] = num; @@ -102,7 +246,7 @@ static void generate_scalar(uint32_t num, secp256k1_scalar* scalar) { CHECK(!overflow); } -static void run_test(bench_data* data, size_t count, int includes_g, int num_iters) { +static void run_ecmult_multi_bench(bench_data* data, size_t count, int includes_g, int num_iters) { char str[32]; static const secp256k1_scalar zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0); size_t iters = 1 + num_iters / count; @@ -112,8 +256,7 @@ static void run_test(bench_data* data, size_t count, int includes_g, int num_ite data->includes_g = includes_g; /* Compute (the negation of) the expected results directly. */ - data->offset1 = (data->count * 0x537b7f6f + 0x8f66a481) % POINTS; - data->offset2 = (data->count * 0x7f6f537b + 0x6a1a8f49) % POINTS; + hash_into_offset(data, data->count); for (iter = 0; iter < iters; ++iter) { secp256k1_scalar tmp; secp256k1_scalar total = data->scalars[(data->offset1++) % POINTS]; @@ -127,25 +270,26 @@ static void run_test(bench_data* data, size_t count, int includes_g, int num_ite } /* Run the benchmark. */ - sprintf(str, includes_g ? "ecmult_%ig" : "ecmult_%i", (int)count); - run_benchmark(str, bench_ecmult, bench_ecmult_setup, bench_ecmult_teardown, data, 10, count * iters); + sprintf(str, includes_g ? "ecmult_multi %ig" : "ecmult_multi %i", (int)count); + run_benchmark(str, bench_ecmult_multi, bench_ecmult_multi_setup, bench_ecmult_multi_teardown, data, 10, count * iters); } int main(int argc, char **argv) { bench_data data; int i, p; - secp256k1_gej* pubkeys_gej; size_t scratch_size; int iters = get_iters(10000); - data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY); - scratch_size = secp256k1_strauss_scratch_size(POINTS) + STRAUSS_SCRATCH_OBJECTS*16; - data.scratch = secp256k1_scratch_space_create(data.ctx, scratch_size); data.ecmult_multi = secp256k1_ecmult_multi_var; if (argc > 1) { - if(have_flag(argc, argv, "pippenger_wnaf")) { + if(have_flag(argc, argv, "-h") + || have_flag(argc, argv, "--help") + || have_flag(argc, argv, "help")) { + help(argv); + return 1; + } else if(have_flag(argc, argv, "pippenger_wnaf")) { printf("Using pippenger_wnaf:\n"); data.ecmult_multi = secp256k1_ecmult_pippenger_batch_single; } else if(have_flag(argc, argv, "strauss_wnaf")) { @@ -153,39 +297,48 @@ int main(int argc, char **argv) { data.ecmult_multi = secp256k1_ecmult_strauss_batch_single; } else if(have_flag(argc, argv, "simple")) { printf("Using simple algorithm:\n"); - data.ecmult_multi = secp256k1_ecmult_multi_var; - secp256k1_scratch_space_destroy(data.ctx, data.scratch); - data.scratch = NULL; } else { - fprintf(stderr, "%s: unrecognized argument '%s'.\n", argv[0], argv[1]); - fprintf(stderr, "Use 'pippenger_wnaf', 'strauss_wnaf', 'simple' or no argument to benchmark a combined algorithm.\n"); + fprintf(stderr, "%s: unrecognized argument '%s'.\n\n", argv[0], argv[1]); + help(argv); return 1; } } + data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY); + scratch_size = secp256k1_strauss_scratch_size(POINTS) + STRAUSS_SCRATCH_OBJECTS*16; + if (!have_flag(argc, argv, "simple")) { + data.scratch = secp256k1_scratch_space_create(data.ctx, scratch_size); + } else { + data.scratch = NULL; + } + /* Allocate stuff */ data.scalars = malloc(sizeof(secp256k1_scalar) * POINTS); data.seckeys = malloc(sizeof(secp256k1_scalar) * POINTS); data.pubkeys = malloc(sizeof(secp256k1_ge) * POINTS); + data.pubkeys_gej = malloc(sizeof(secp256k1_gej) * POINTS); data.expected_output = malloc(sizeof(secp256k1_gej) * (iters + 1)); data.output = malloc(sizeof(secp256k1_gej) * (iters + 1)); /* Generate a set of scalars, and private/public keypairs. */ - pubkeys_gej = malloc(sizeof(secp256k1_gej) * POINTS); - secp256k1_gej_set_ge(&pubkeys_gej[0], &secp256k1_ge_const_g); + secp256k1_gej_set_ge(&data.pubkeys_gej[0], &secp256k1_ge_const_g); secp256k1_scalar_set_int(&data.seckeys[0], 1); for (i = 0; i < POINTS; ++i) { generate_scalar(i, &data.scalars[i]); if (i) { - secp256k1_gej_double_var(&pubkeys_gej[i], &pubkeys_gej[i - 1], NULL); + secp256k1_gej_double_var(&data.pubkeys_gej[i], &data.pubkeys_gej[i - 1], NULL); secp256k1_scalar_add(&data.seckeys[i], &data.seckeys[i - 1], &data.seckeys[i - 1]); } } - secp256k1_ge_set_all_gej_var(data.pubkeys, pubkeys_gej, POINTS); - free(pubkeys_gej); + secp256k1_ge_set_all_gej_var(data.pubkeys, data.pubkeys_gej, POINTS); + + + /* Initialize offset1 and offset2 */ + hash_into_offset(&data, 0); + run_ecmult_bench(&data, iters); for (i = 1; i <= 8; ++i) { - run_test(&data, i, 1, iters); + run_ecmult_multi_bench(&data, i, 1, iters); } /* This is disabled with low count of iterations because the loop runs 77 times even with iters=1 @@ -194,7 +347,7 @@ int main(int argc, char **argv) { if (iters > 2) { for (p = 0; p <= 11; ++p) { for (i = 9; i <= 16; ++i) { - run_test(&data, i << p, 1, iters); + run_ecmult_multi_bench(&data, i << p, 1, iters); } } } @@ -205,6 +358,7 @@ int main(int argc, char **argv) { secp256k1_context_destroy(data.ctx); free(data.scalars); free(data.pubkeys); + free(data.pubkeys_gej); free(data.seckeys); free(data.output); free(data.expected_output);