From 925f78d55e112cd00f1e2867886bdc751a5d6606 Mon Sep 17 00:00:00 2001 From: Peter Dettman Date: Mon, 6 Dec 2021 01:24:30 +0700 Subject: [PATCH 1/7] Add _fe_half and use in _gej_add_ge - Trades 1 _half for 3 _mul_int and 2 _normalize_weak - Updated formula and comments in _gej_add_ge - Added internal benchmark for _fe_half --- src/bench_internal.c | 10 ++++++ src/field.h | 5 +++ src/field_10x26_impl.h | 76 ++++++++++++++++++++++++++++++++++++++++++ src/field_5x52_impl.h | 65 ++++++++++++++++++++++++++++++++++++ src/group_impl.h | 36 +++++++++----------- 5 files changed, 172 insertions(+), 20 deletions(-) diff --git a/src/bench_internal.c b/src/bench_internal.c index aed82161273d9..3c145f306c591 100644 --- a/src/bench_internal.c +++ b/src/bench_internal.c @@ -140,6 +140,15 @@ void bench_scalar_inverse_var(void* arg, int iters) { CHECK(j <= iters); } +void bench_field_half(void* arg, int iters) { + int i; + bench_inv *data = (bench_inv*)arg; + + for (i = 0; i < iters; i++) { + secp256k1_fe_half(&data->fe[0]); + } +} + void bench_field_normalize(void* arg, int iters) { int i; bench_inv *data = (bench_inv*)arg; @@ -354,6 +363,7 @@ int main(int argc, char **argv) { if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse", bench_scalar_inverse, bench_setup, NULL, &data, 10, iters); if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse_var", bench_scalar_inverse_var, bench_setup, NULL, &data, 10, iters); + if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "half")) run_benchmark("field_half", bench_field_half, bench_setup, NULL, &data, 10, iters*100); if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "normalize")) run_benchmark("field_normalize", bench_field_normalize, bench_setup, NULL, &data, 10, iters*100); if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "normalize")) run_benchmark("field_normalize_weak", bench_field_normalize_weak, bench_setup, NULL, &data, 10, iters*100); if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "sqr")) run_benchmark("field_sqr", bench_field_sqr, bench_setup, NULL, &data, 10, iters*10); diff --git a/src/field.h b/src/field.h index 23d3e3ce9adf6..a52e056dd2e16 100644 --- a/src/field.h +++ b/src/field.h @@ -130,4 +130,9 @@ static void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_f /** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. Both *r and *a must be initialized.*/ static void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag); +/** Halves the value of a field element modulo the field prime. Constant-time. + * For an input magnitude 'm', the output magnitude is set to 'floor(m/2) + 1'. + * The output is not guaranteed to be normalized, regardless of the input. */ +static void secp256k1_fe_half(secp256k1_fe *r); + #endif /* SECP256K1_FIELD_H */ diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h index aecb6117e7990..627cd5b57bfaf 100644 --- a/src/field_10x26_impl.h +++ b/src/field_10x26_impl.h @@ -1133,6 +1133,82 @@ static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_ #endif } +static SECP256K1_INLINE void secp256k1_fe_half(secp256k1_fe *r) { + uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], + t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; + uint32_t one = (uint32_t)1; + uint32_t mask = -(t0 & one) >> 6; + +#ifdef VERIFY + secp256k1_fe_verify(r); + VERIFY_CHECK(r->magnitude < 32); +#endif + + /* Bounds analysis (over the rationals). + * + * Let m = r->magnitude + * C = 0x3FFFFFFUL * 2 + * D = 0x03FFFFFUL * 2 + * + * Initial bounds: t0..t8 <= C * m + * t9 <= D * m + */ + + t0 += 0x3FFFC2FUL & mask; + t1 += 0x3FFFFBFUL & mask; + t2 += mask; + t3 += mask; + t4 += mask; + t5 += mask; + t6 += mask; + t7 += mask; + t8 += mask; + t9 += mask >> 4; + + VERIFY_CHECK((t0 & one) == 0); + + /* t0..t8: added <= C/2 + * t9: added <= D/2 + * + * Current bounds: t0..t8 <= C * (m + 1/2) + * t9 <= D * (m + 1/2) + */ + + r->n[0] = (t0 >> 1) + ((t1 & one) << 25); + r->n[1] = (t1 >> 1) + ((t2 & one) << 25); + r->n[2] = (t2 >> 1) + ((t3 & one) << 25); + r->n[3] = (t3 >> 1) + ((t4 & one) << 25); + r->n[4] = (t4 >> 1) + ((t5 & one) << 25); + r->n[5] = (t5 >> 1) + ((t6 & one) << 25); + r->n[6] = (t6 >> 1) + ((t7 & one) << 25); + r->n[7] = (t7 >> 1) + ((t8 & one) << 25); + r->n[8] = (t8 >> 1) + ((t9 & one) << 25); + r->n[9] = (t9 >> 1); + + /* t0..t8: shifted right and added <= C/4 + 1/2 + * t9: shifted right + * + * Current bounds: t0..t8 <= C * (m/2 + 1/2) + * t9 <= D * (m/2 + 1/4) + */ + +#ifdef VERIFY + /* Therefore the output magnitude (M) has to be set such that: + * t0..t8: C * M >= C * (m/2 + 1/2) + * t9: D * M >= D * (m/2 + 1/4) + * + * It suffices for all limbs that, for any input magnitude m: + * M >= m/2 + 1/2 + * + * and since we want the smallest such integer value for M: + * M == floor(m/2) + 1 + */ + r->magnitude = (r->magnitude >> 1) + 1; + r->normalized = 0; + secp256k1_fe_verify(r); +#endif +} + static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag) { uint32_t mask0, mask1; VG_CHECK_VERIFY(r->n, sizeof(r->n)); diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h index 9b824b92043e6..6e6bb3c649ef4 100644 --- a/src/field_5x52_impl.h +++ b/src/field_5x52_impl.h @@ -477,6 +477,71 @@ static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_ #endif } +static SECP256K1_INLINE void secp256k1_fe_half(secp256k1_fe *r) { + uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4]; + uint64_t one = (uint64_t)1; + uint64_t mask = -(t0 & one) >> 12; + +#ifdef VERIFY + secp256k1_fe_verify(r); + VERIFY_CHECK(r->magnitude < 32); +#endif + + /* Bounds analysis (over the rationals). + * + * Let m = r->magnitude + * C = 0xFFFFFFFFFFFFFULL * 2 + * D = 0x0FFFFFFFFFFFFULL * 2 + * + * Initial bounds: t0..t3 <= C * m + * t4 <= D * m + */ + + t0 += 0xFFFFEFFFFFC2FULL & mask; + t1 += mask; + t2 += mask; + t3 += mask; + t4 += mask >> 4; + + VERIFY_CHECK((t0 & one) == 0); + + /* t0..t3: added <= C/2 + * t4: added <= D/2 + * + * Current bounds: t0..t3 <= C * (m + 1/2) + * t4 <= D * (m + 1/2) + */ + + r->n[0] = (t0 >> 1) + ((t1 & one) << 51); + r->n[1] = (t1 >> 1) + ((t2 & one) << 51); + r->n[2] = (t2 >> 1) + ((t3 & one) << 51); + r->n[3] = (t3 >> 1) + ((t4 & one) << 51); + r->n[4] = (t4 >> 1); + + /* t0..t3: shifted right and added <= C/4 + 1/2 + * t4: shifted right + * + * Current bounds: t0..t3 <= C * (m/2 + 1/2) + * t4 <= D * (m/2 + 1/4) + */ + +#ifdef VERIFY + /* Therefore the output magnitude (M) has to be set such that: + * t0..t3: C * M >= C * (m/2 + 1/2) + * t4: D * M >= D * (m/2 + 1/4) + * + * It suffices for all limbs that, for any input magnitude m: + * M >= m/2 + 1/2 + * + * and since we want the smallest such integer value for M: + * M == floor(m/2) + 1 + */ + r->magnitude = (r->magnitude >> 1) + 1; + r->normalized = 0; + secp256k1_fe_verify(r); +#endif +} + static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag) { uint64_t mask0, mask1; VG_CHECK_VERIFY(r->n, sizeof(r->n)); diff --git a/src/group_impl.h b/src/group_impl.h index 7acc2cbbe2c3b..d59beaeb2ebd0 100644 --- a/src/group_impl.h +++ b/src/group_impl.h @@ -492,7 +492,7 @@ static void secp256k1_gej_add_zinv_var(secp256k1_gej *r, const secp256k1_gej *a, static void secp256k1_gej_add_ge(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_ge *b) { - /* Operations: 7 mul, 5 sqr, 4 normalize, 21 mul_int/add/negate/cmov */ + /* Operations: 7 mul, 5 sqr, 24 add/cmov/half/mul_int/negate/normalize_weak/normalizes_to_zero */ secp256k1_fe zz, u1, u2, s1, s2, t, tt, m, n, q, rr; secp256k1_fe m_alt, rr_alt; int infinity, degenerate; @@ -513,11 +513,11 @@ static void secp256k1_gej_add_ge(secp256k1_gej *r, const secp256k1_gej *a, const * Z = Z1*Z2 * T = U1+U2 * M = S1+S2 - * Q = T*M^2 + * Q = -T*M^2 * R = T^2-U1*U2 - * X3 = 4*(R^2-Q) - * Y3 = 4*(R*(3*Q-2*R^2)-M^4) - * Z3 = 2*M*Z + * X3 = R^2+Q + * Y3 = -(R*(2*X3+Q)+M^4)/2 + * Z3 = M*Z * (Note that the paper uses xi = Xi / Zi and yi = Yi / Zi instead.) * * This formula has the benefit of being the same for both addition @@ -581,7 +581,8 @@ static void secp256k1_gej_add_ge(secp256k1_gej *r, const secp256k1_gej *a, const * and denominator of lambda; R and M represent the explicit * expressions x1^2 + x2^2 + x1x2 and y1 + y2. */ secp256k1_fe_sqr(&n, &m_alt); /* n = Malt^2 (1) */ - secp256k1_fe_mul(&q, &n, &t); /* q = Q = T*Malt^2 (1) */ + secp256k1_fe_negate(&q, &t, 2); /* q = -T (3) */ + secp256k1_fe_mul(&q, &q, &n); /* q = Q = -T*Malt^2 (1) */ /* These two lines use the observation that either M == Malt or M == 0, * so M^3 * Malt is either Malt^4 (which is computed by squaring), or * zero (which is "computed" by cmov). So the cost is one squaring @@ -589,21 +590,16 @@ static void secp256k1_gej_add_ge(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_fe_sqr(&n, &n); secp256k1_fe_cmov(&n, &m, degenerate); /* n = M^3 * Malt (2) */ secp256k1_fe_sqr(&t, &rr_alt); /* t = Ralt^2 (1) */ - secp256k1_fe_mul(&r->z, &a->z, &m_alt); /* r->z = Malt*Z (1) */ + secp256k1_fe_mul(&r->z, &a->z, &m_alt); /* r->z = Z3 = Malt*Z (1) */ infinity = secp256k1_fe_normalizes_to_zero(&r->z) & ~a->infinity; - secp256k1_fe_mul_int(&r->z, 2); /* r->z = Z3 = 2*Malt*Z (2) */ - secp256k1_fe_negate(&q, &q, 1); /* q = -Q (2) */ - secp256k1_fe_add(&t, &q); /* t = Ralt^2-Q (3) */ - secp256k1_fe_normalize_weak(&t); - r->x = t; /* r->x = Ralt^2-Q (1) */ - secp256k1_fe_mul_int(&t, 2); /* t = 2*x3 (2) */ - secp256k1_fe_add(&t, &q); /* t = 2*x3 - Q: (4) */ - secp256k1_fe_mul(&t, &t, &rr_alt); /* t = Ralt*(2*x3 - Q) (1) */ - secp256k1_fe_add(&t, &n); /* t = Ralt*(2*x3 - Q) + M^3*Malt (3) */ - secp256k1_fe_negate(&r->y, &t, 3); /* r->y = Ralt*(Q - 2x3) - M^3*Malt (4) */ - secp256k1_fe_normalize_weak(&r->y); - secp256k1_fe_mul_int(&r->x, 4); /* r->x = X3 = 4*(Ralt^2-Q) */ - secp256k1_fe_mul_int(&r->y, 4); /* r->y = Y3 = 4*Ralt*(Q - 2x3) - 4*M^3*Malt (4) */ + secp256k1_fe_add(&t, &q); /* t = Ralt^2 + Q (2) */ + r->x = t; /* r->x = X3 = Ralt^2 + Q (2) */ + secp256k1_fe_mul_int(&t, 2); /* t = 2*X3 (4) */ + secp256k1_fe_add(&t, &q); /* t = 2*X3 + Q (5) */ + secp256k1_fe_mul(&t, &t, &rr_alt); /* t = Ralt*(2*X3 + Q) (1) */ + secp256k1_fe_add(&t, &n); /* t = Ralt*(2*X3 + Q) + M^3*Malt (3) */ + secp256k1_fe_negate(&r->y, &t, 3); /* r->y = -(Ralt*(2*X3 + Q) + M^3*Malt) (4) */ + secp256k1_fe_half(&r->y); /* r->y = Y3 = -(Ralt*(2*X3 + Q) + M^3*Malt)/2 (3) */ /** In case a->infinity == 1, replace r with (b->x, b->y, 1). */ secp256k1_fe_cmov(&r->x, &b->x, a->infinity); From 9cc5c257eddc2d7614985be60bee29cf2bec65fb Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 21 Dec 2021 20:39:55 -0500 Subject: [PATCH 2/7] Add test for secp256k1_fe_half --- src/tests.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/tests.c b/src/tests.c index 8b3ac8140eb39..155c1c7b3b7a4 100644 --- a/src/tests.c +++ b/src/tests.c @@ -2548,6 +2548,14 @@ void run_field_misc(void) { secp256k1_fe_add(&q, &x); CHECK(check_fe_equal(&y, &z)); CHECK(check_fe_equal(&q, &y)); + /* Check secp256k1_fe_half. */ + z = x; + secp256k1_fe_half(&z); + secp256k1_fe_add(&z, &z); + CHECK(check_fe_equal(&x, &z)); + secp256k1_fe_add(&z, &z); + secp256k1_fe_half(&z); + CHECK(check_fe_equal(&x, &z)); } } From 2cbb4b1a424d9dee12a4e11f0479410b7e4cc930 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 21 Dec 2021 20:47:06 -0500 Subject: [PATCH 3/7] Run more iterations of run_field_misc At count=64, this makes the test take around 1% of the total time. --- src/tests.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/tests.c b/src/tests.c index 155c1c7b3b7a4..9da95ca60556a 100644 --- a/src/tests.c +++ b/src/tests.c @@ -2478,9 +2478,13 @@ void run_field_misc(void) { secp256k1_fe q; secp256k1_fe fe5 = SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 5); int i, j; - for (i = 0; i < 5*count; i++) { + for (i = 0; i < 1000 * count; i++) { secp256k1_fe_storage xs, ys, zs; - random_fe(&x); + if (i & 1) { + random_fe(&x); + } else { + random_fe_test(&x); + } random_fe_non_zero(&y); /* Test the fe equality and comparison operations. */ CHECK(secp256k1_fe_cmp_var(&x, &x) == 0); From 557b31fac36529948709d4bfcc00ad3acb7e83b9 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 22 Dec 2021 19:39:22 -0500 Subject: [PATCH 4/7] Doubling formula using fe_half --- src/group_impl.h | 57 +++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/src/group_impl.h b/src/group_impl.h index d59beaeb2ebd0..ca9fc3837e5c2 100644 --- a/src/group_impl.h +++ b/src/group_impl.h @@ -271,37 +271,36 @@ static int secp256k1_ge_is_valid_var(const secp256k1_ge *a) { } static SECP256K1_INLINE void secp256k1_gej_double(secp256k1_gej *r, const secp256k1_gej *a) { - /* Operations: 3 mul, 4 sqr, 0 normalize, 12 mul_int/add/negate. - * - * Note that there is an implementation described at - * https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l - * which trades a multiply for a square, but in practice this is actually slower, - * mainly because it requires more normalizations. - */ - secp256k1_fe t1,t2,t3,t4; + secp256k1_fe l, s, t, q; r->infinity = a->infinity; - secp256k1_fe_mul(&r->z, &a->z, &a->y); - secp256k1_fe_mul_int(&r->z, 2); /* Z' = 2*Y*Z (2) */ - secp256k1_fe_sqr(&t1, &a->x); - secp256k1_fe_mul_int(&t1, 3); /* T1 = 3*X^2 (3) */ - secp256k1_fe_sqr(&t2, &t1); /* T2 = 9*X^4 (1) */ - secp256k1_fe_sqr(&t3, &a->y); - secp256k1_fe_mul_int(&t3, 2); /* T3 = 2*Y^2 (2) */ - secp256k1_fe_sqr(&t4, &t3); - secp256k1_fe_mul_int(&t4, 2); /* T4 = 8*Y^4 (2) */ - secp256k1_fe_mul(&t3, &t3, &a->x); /* T3 = 2*X*Y^2 (1) */ - r->x = t3; - secp256k1_fe_mul_int(&r->x, 4); /* X' = 8*X*Y^2 (4) */ - secp256k1_fe_negate(&r->x, &r->x, 4); /* X' = -8*X*Y^2 (5) */ - secp256k1_fe_add(&r->x, &t2); /* X' = 9*X^4 - 8*X*Y^2 (6) */ - secp256k1_fe_negate(&t2, &t2, 1); /* T2 = -9*X^4 (2) */ - secp256k1_fe_mul_int(&t3, 6); /* T3 = 12*X*Y^2 (6) */ - secp256k1_fe_add(&t3, &t2); /* T3 = 12*X*Y^2 - 9*X^4 (8) */ - secp256k1_fe_mul(&r->y, &t1, &t3); /* Y' = 36*X^3*Y^2 - 27*X^6 (1) */ - secp256k1_fe_negate(&t2, &t4, 2); /* T2 = -8*Y^4 (3) */ - secp256k1_fe_add(&r->y, &t2); /* Y' = 36*X^3*Y^2 - 27*X^6 - 8*Y^4 (4) */ + /* Formula used: + * L = (3/2) * X1^2 + * S = Y1^2 + * T = X1*S + * X3 = L^2 - 2*T + * Y3 = L*(T - X3) - S^2 + * Z3 = Y1*Z1 + */ + + secp256k1_fe_mul(&r->z, &a->z, &a->y); /* Z3 = Y1*Z1 (1) */ + secp256k1_fe_sqr(&l, &a->x); /* L = X1^2 (1) */ + secp256k1_fe_mul_int(&l, 3); /* L = 3*X1^2 (3) */ + secp256k1_fe_half(&l); /* L = 3/2*X1^2 (2) */ + secp256k1_fe_sqr(&s, &a->y); /* S = Y1^2 (1) */ + secp256k1_fe_mul(&t, &a->x, &s); /* T = X1*S (1) */ + q = t; + secp256k1_fe_add(&q, &t); /* Q = 2*T (2) */ + secp256k1_fe_negate(&r->x, &q, 2); /* X3 = -2*T (3) */ + secp256k1_fe_sqr(&q, &l); /* Q = L^2 (1) */ + secp256k1_fe_add(&r->x, &q); /* X3 = L^2 - 2*T (4) */ + secp256k1_fe_negate(&q, &r->x, 4); /* Q = -X3 (5) */ + secp256k1_fe_add(&q, &t); /* Q = T-X3 (6) */ + secp256k1_fe_mul(&q, &q, &l); /* Q = L*(T-X3) (1) */ + secp256k1_fe_sqr(&s, &s); + secp256k1_fe_negate(&r->y, &s, 1); /* Y3 = -S^2 (2) */ + secp256k1_fe_add(&r->y, &q); /* Y3 = L*(T-X3) - S^2 (3) */ } static void secp256k1_gej_double_var(secp256k1_gej *r, const secp256k1_gej *a, secp256k1_fe *rzr) { @@ -325,8 +324,6 @@ static void secp256k1_gej_double_var(secp256k1_gej *r, const secp256k1_gej *a, s if (rzr != NULL) { *rzr = a->y; - secp256k1_fe_normalize_weak(rzr); - secp256k1_fe_mul_int(rzr, 2); } secp256k1_gej_double(r, a); From 4eb8b932ff8e64f8de3ae8ecfebeab1e84ca420e Mon Sep 17 00:00:00 2001 From: Peter Dettman Date: Thu, 23 Dec 2021 14:18:46 +0700 Subject: [PATCH 5/7] Further improve doubling formula using fe_half --- src/group_impl.h | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/group_impl.h b/src/group_impl.h index ca9fc3837e5c2..b19b02a01fafb 100644 --- a/src/group_impl.h +++ b/src/group_impl.h @@ -271,36 +271,35 @@ static int secp256k1_ge_is_valid_var(const secp256k1_ge *a) { } static SECP256K1_INLINE void secp256k1_gej_double(secp256k1_gej *r, const secp256k1_gej *a) { - secp256k1_fe l, s, t, q; + /* Operations: 3 mul, 4 sqr, 8 add/half/mul_int/negate */ + secp256k1_fe l, s, t; r->infinity = a->infinity; /* Formula used: * L = (3/2) * X1^2 * S = Y1^2 - * T = X1*S - * X3 = L^2 - 2*T - * Y3 = L*(T - X3) - S^2 + * T = -X1*S + * X3 = L^2 + 2*T + * Y3 = -(L*(X3 + T) + S^2) * Z3 = Y1*Z1 */ secp256k1_fe_mul(&r->z, &a->z, &a->y); /* Z3 = Y1*Z1 (1) */ - secp256k1_fe_sqr(&l, &a->x); /* L = X1^2 (1) */ - secp256k1_fe_mul_int(&l, 3); /* L = 3*X1^2 (3) */ - secp256k1_fe_half(&l); /* L = 3/2*X1^2 (2) */ - secp256k1_fe_sqr(&s, &a->y); /* S = Y1^2 (1) */ - secp256k1_fe_mul(&t, &a->x, &s); /* T = X1*S (1) */ - q = t; - secp256k1_fe_add(&q, &t); /* Q = 2*T (2) */ - secp256k1_fe_negate(&r->x, &q, 2); /* X3 = -2*T (3) */ - secp256k1_fe_sqr(&q, &l); /* Q = L^2 (1) */ - secp256k1_fe_add(&r->x, &q); /* X3 = L^2 - 2*T (4) */ - secp256k1_fe_negate(&q, &r->x, 4); /* Q = -X3 (5) */ - secp256k1_fe_add(&q, &t); /* Q = T-X3 (6) */ - secp256k1_fe_mul(&q, &q, &l); /* Q = L*(T-X3) (1) */ - secp256k1_fe_sqr(&s, &s); - secp256k1_fe_negate(&r->y, &s, 1); /* Y3 = -S^2 (2) */ - secp256k1_fe_add(&r->y, &q); /* Y3 = L*(T-X3) - S^2 (3) */ + secp256k1_fe_sqr(&s, &a->y); /* S = Y1^2 (1) */ + secp256k1_fe_sqr(&l, &a->x); /* L = X1^2 (1) */ + secp256k1_fe_mul_int(&l, 3); /* L = 3*X1^2 (3) */ + secp256k1_fe_half(&l); /* L = 3/2*X1^2 (2) */ + secp256k1_fe_negate(&t, &s, 1); /* T = -S (2) */ + secp256k1_fe_mul(&t, &t, &a->x); /* T = -X1*S (1) */ + secp256k1_fe_sqr(&r->x, &l); /* X3 = L^2 (1) */ + secp256k1_fe_add(&r->x, &t); /* X3 = L^2 + T (2) */ + secp256k1_fe_add(&r->x, &t); /* X3 = L^2 + 2*T (3) */ + secp256k1_fe_sqr(&s, &s); /* S' = S^2 (1) */ + secp256k1_fe_add(&t, &r->x); /* T' = X3 + T (4) */ + secp256k1_fe_mul(&r->y, &t, &l); /* Y3 = L*(X3 + T) (1) */ + secp256k1_fe_add(&r->y, &s); /* Y3 = L*(X3 + T) + S^2 (2) */ + secp256k1_fe_negate(&r->y, &r->y, 2); /* Y3 = -(L*(X3 + T) + S^2) (3) */ } static void secp256k1_gej_double_var(secp256k1_gej *r, const secp256k1_gej *a, secp256k1_fe *rzr) { @@ -324,6 +323,7 @@ static void secp256k1_gej_double_var(secp256k1_gej *r, const secp256k1_gej *a, s if (rzr != NULL) { *rzr = a->y; + secp256k1_fe_normalize_weak(rzr); } secp256k1_gej_double(r, a); From d64bb5d4f3fbd48b570d847c9389b9cf8f3d9abc Mon Sep 17 00:00:00 2001 From: Peter Dettman Date: Thu, 23 Dec 2021 16:41:10 +0700 Subject: [PATCH 6/7] Add fe_half tests for worst-case inputs - Add field method _fe_get_bounds --- src/field.h | 4 ++++ src/field_10x26_impl.h | 20 +++++++++++++++++ src/field_5x52_impl.h | 15 +++++++++++++ src/tests.c | 50 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+) diff --git a/src/field.h b/src/field.h index a52e056dd2e16..2584a494eeb09 100644 --- a/src/field.h +++ b/src/field.h @@ -135,4 +135,8 @@ static void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag); * The output is not guaranteed to be normalized, regardless of the input. */ static void secp256k1_fe_half(secp256k1_fe *r); +/** Sets each limb of 'r' to its upper bound at magnitude 'm'. The output will also have its + * magnitude set to 'm' and is normalized if (and only if) 'm' is zero. */ +static void secp256k1_fe_get_bounds(secp256k1_fe *r, int m); + #endif /* SECP256K1_FIELD_H */ diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h index 627cd5b57bfaf..21742bf6eb6c4 100644 --- a/src/field_10x26_impl.h +++ b/src/field_10x26_impl.h @@ -49,6 +49,26 @@ static void secp256k1_fe_verify(const secp256k1_fe *a) { } #endif +static void secp256k1_fe_get_bounds(secp256k1_fe *r, int m) { + VERIFY_CHECK(m >= 0); + VERIFY_CHECK(m <= 2048); + r->n[0] = 0x3FFFFFFUL * 2 * m; + r->n[1] = 0x3FFFFFFUL * 2 * m; + r->n[2] = 0x3FFFFFFUL * 2 * m; + r->n[3] = 0x3FFFFFFUL * 2 * m; + r->n[4] = 0x3FFFFFFUL * 2 * m; + r->n[5] = 0x3FFFFFFUL * 2 * m; + r->n[6] = 0x3FFFFFFUL * 2 * m; + r->n[7] = 0x3FFFFFFUL * 2 * m; + r->n[8] = 0x3FFFFFFUL * 2 * m; + r->n[9] = 0x03FFFFFUL * 2 * m; +#ifdef VERIFY + r->magnitude = m; + r->normalized = (m == 0); + secp256k1_fe_verify(r); +#endif +} + static void secp256k1_fe_normalize(secp256k1_fe *r) { uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h index 6e6bb3c649ef4..6bd202f587130 100644 --- a/src/field_5x52_impl.h +++ b/src/field_5x52_impl.h @@ -58,6 +58,21 @@ static void secp256k1_fe_verify(const secp256k1_fe *a) { } #endif +static void secp256k1_fe_get_bounds(secp256k1_fe *r, int m) { + VERIFY_CHECK(m >= 0); + VERIFY_CHECK(m <= 2048); + r->n[0] = 0xFFFFFFFFFFFFFULL * 2 * m; + r->n[1] = 0xFFFFFFFFFFFFFULL * 2 * m; + r->n[2] = 0xFFFFFFFFFFFFFULL * 2 * m; + r->n[3] = 0xFFFFFFFFFFFFFULL * 2 * m; + r->n[4] = 0x0FFFFFFFFFFFFULL * 2 * m; +#ifdef VERIFY + r->magnitude = m; + r->normalized = (m == 0); + secp256k1_fe_verify(r); +#endif +} + static void secp256k1_fe_normalize(secp256k1_fe *r) { uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4]; diff --git a/src/tests.c b/src/tests.c index 9da95ca60556a..26063a81569bc 100644 --- a/src/tests.c +++ b/src/tests.c @@ -2471,6 +2471,55 @@ int fe_identical(const secp256k1_fe *a, const secp256k1_fe *b) { return ret; } +void run_field_half(void) { + secp256k1_fe t, u; + int m; + + /* Check magnitude 0 input */ + secp256k1_fe_get_bounds(&t, 0); + secp256k1_fe_half(&t); +#ifdef VERIFY + CHECK(t.magnitude == 1); + CHECK(t.normalized == 0); +#endif + CHECK(secp256k1_fe_normalizes_to_zero(&t)); + + /* Check non-zero magnitudes in the supported range */ + for (m = 1; m < 32; m++) { + /* Check max-value input */ + secp256k1_fe_get_bounds(&t, m); + + u = t; + secp256k1_fe_half(&u); +#ifdef VERIFY + CHECK(u.magnitude == (m >> 1) + 1); + CHECK(u.normalized == 0); +#endif + secp256k1_fe_normalize_weak(&u); + secp256k1_fe_add(&u, &u); + CHECK(check_fe_equal(&t, &u)); + + /* Check worst-case input: ensure the LSB is 1 so that P will be added, + * which will also cause all carries to be 1, since all limbs that can + * generate a carry are initially even and all limbs of P are odd in + * every existing field implementation. */ + secp256k1_fe_get_bounds(&t, m); + CHECK(t.n[0] > 0); + CHECK((t.n[0] & 1) == 0); + --t.n[0]; + + u = t; + secp256k1_fe_half(&u); +#ifdef VERIFY + CHECK(u.magnitude == (m >> 1) + 1); + CHECK(u.normalized == 0); +#endif + secp256k1_fe_normalize_weak(&u); + secp256k1_fe_add(&u, &u); + CHECK(check_fe_equal(&t, &u)); + } +} + void run_field_misc(void) { secp256k1_fe x; secp256k1_fe y; @@ -6924,6 +6973,7 @@ int main(int argc, char **argv) { run_scalar_tests(); /* field tests */ + run_field_half(); run_field_misc(); run_field_convert(); run_fe_mul(); From e848c3799c4f31367c3ed98d17e3b7de504d4c6e Mon Sep 17 00:00:00 2001 From: Peter Dettman Date: Mon, 27 Dec 2021 14:02:13 +0700 Subject: [PATCH 7/7] Update sage files for new formulae - formula_secp256k1_gej_double_var - formula_secp256k1_gej_add_ge --- sage/prove_group_implementations.sage | 41 +++++++++++---------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/sage/prove_group_implementations.sage b/sage/prove_group_implementations.sage index a97e732f7fa3a..eb5fe7cfe3667 100644 --- a/sage/prove_group_implementations.sage +++ b/sage/prove_group_implementations.sage @@ -8,25 +8,20 @@ load("weierstrass_prover.sage") def formula_secp256k1_gej_double_var(a): """libsecp256k1's secp256k1_gej_double_var, used by various addition functions""" rz = a.Z * a.Y - rz = rz * 2 - t1 = a.X^2 - t1 = t1 * 3 - t2 = t1^2 - t3 = a.Y^2 - t3 = t3 * 2 - t4 = t3^2 - t4 = t4 * 2 - t3 = t3 * a.X - rx = t3 - rx = rx * 4 - rx = -rx - rx = rx + t2 - t2 = -t2 - t3 = t3 * 6 - t3 = t3 + t2 - ry = t1 * t3 - t2 = -t4 - ry = ry + t2 + s = a.Y^2 + l = a.X^2 + l = l * 3 + l = l / 2 + t = -s + t = t * a.X + rx = l^2 + rx = rx + t + rx = rx + t + s = s^2 + t = t + rx + ry = t * l + ry = ry + s + ry = -ry return jacobianpoint(rx, ry, rz) def formula_secp256k1_gej_add_var(branch, a, b): @@ -197,7 +192,8 @@ def formula_secp256k1_gej_add_ge(branch, a, b): rr_alt = rr m_alt = m n = m_alt^2 - q = n * t + q = -t + q = q * n n = n^2 if degenerate: n = m @@ -210,8 +206,6 @@ def formula_secp256k1_gej_add_ge(branch, a, b): zeroes.update({rz : 'r.z=0'}) else: nonzeroes.update({rz : 'r.z!=0'}) - rz = rz * 2 - q = -q t = t + q rx = t t = t * 2 @@ -219,8 +213,7 @@ def formula_secp256k1_gej_add_ge(branch, a, b): t = t * rr_alt t = t + n ry = -t - rx = rx * 4 - ry = ry * 4 + ry = ry / 2 if a_infinity: rx = b.X ry = b.Y